inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +3 -1
- inspect_ai/_cli/eval.py +15 -9
- inspect_ai/_display/core/active.py +4 -1
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +0 -5
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +79 -12
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/eval.py +10 -1
- inspect_ai/_eval/loader.py +79 -19
- inspect_ai/_eval/registry.py +6 -0
- inspect_ai/_eval/score.py +3 -1
- inspect_ai/_eval/task/results.py +51 -22
- inspect_ai/_eval/task/run.py +47 -13
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25498 -2044
- inspect_ai/_view/www/log-schema.json +32 -2
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +14 -16
- inspect_ai/_view/www/src/Types.mjs +1 -2
- inspect_ai/_view/www/src/api/Types.ts +133 -0
- inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
- inspect_ai/_view/www/src/api/api-http.ts +219 -0
- inspect_ai/_view/www/src/api/api-shared.ts +47 -0
- inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
- inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
- inspect_ai/_view/www/src/api/index.ts +51 -0
- inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +77 -4
- inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
- inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
- inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +13 -2
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
- inspect_ai/_view/www/src/utils/vscode.ts +36 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_human/manager.py +1 -1
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/log/_log.py +1 -1
- inspect_ai/log/_samples.py +16 -0
- inspect_ai/log/_transcript.py +4 -1
- inspect_ai/model/_call_tools.py +59 -0
- inspect_ai/model/_conversation.py +16 -7
- inspect_ai/model/_generate_config.py +12 -12
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +22 -2
- inspect_ai/model/_openai.py +383 -0
- inspect_ai/model/_providers/anthropic.py +152 -55
- inspect_ai/model/_providers/azureai.py +21 -21
- inspect_ai/model/_providers/bedrock.py +37 -40
- inspect_ai/model/_providers/goodfire.py +248 -0
- inspect_ai/model/_providers/google.py +46 -54
- inspect_ai/model/_providers/groq.py +7 -3
- inspect_ai/model/_providers/hf.py +6 -0
- inspect_ai/model/_providers/mistral.py +13 -12
- inspect_ai/model/_providers/openai.py +51 -218
- inspect_ai/model/_providers/openai_o1.py +11 -12
- inspect_ai/model/_providers/providers.py +23 -1
- inspect_ai/model/_providers/together.py +12 -12
- inspect_ai/model/_providers/util/__init__.py +2 -3
- inspect_ai/model/_providers/util/hf_handler.py +1 -1
- inspect_ai/model/_providers/util/llama31.py +1 -1
- inspect_ai/model/_providers/util/util.py +0 -76
- inspect_ai/model/_providers/vertex.py +1 -4
- inspect_ai/scorer/_metric.py +3 -0
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +4 -3
- inspect_ai/solver/__init__.py +4 -5
- inspect_ai/solver/_basic_agent.py +1 -1
- inspect_ai/solver/_bridge/__init__.py +3 -0
- inspect_ai/solver/_bridge/bridge.py +100 -0
- inspect_ai/solver/_bridge/patch.py +170 -0
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_solver.py +6 -0
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +12 -1
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_display.py +5 -0
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/docker/docker.py +64 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
- inspect_ai/util/_sandbox/environment.py +14 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
- inspect_ai/_view/www/src/api/Types.mjs +0 -117
- inspect_ai/_view/www/src/api/api-http.mjs +0 -300
- inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
- inspect_ai/_view/www/src/api/index.mjs +0 -49
- inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
inspect_ai/_view/www/yarn.lock
CHANGED
@@ -169,6 +169,70 @@
|
|
169
169
|
"@babel/helper-validator-identifier" "^7.24.7"
|
170
170
|
to-fast-properties "^2.0.0"
|
171
171
|
|
172
|
+
"@codemirror/autocomplete@^6.0.0":
|
173
|
+
version "6.18.3"
|
174
|
+
resolved "https://registry.yarnpkg.com/@codemirror/autocomplete/-/autocomplete-6.18.3.tgz#f9ea79a2f369662516f71bc0b2f819454d3c8e00"
|
175
|
+
integrity sha512-1dNIOmiM0z4BIBwxmxEfA1yoxh1MF/6KPBbh20a5vphGV0ictKlgQsbJs6D6SkR6iJpGbpwRsa6PFMNlg9T9pQ==
|
176
|
+
dependencies:
|
177
|
+
"@codemirror/language" "^6.0.0"
|
178
|
+
"@codemirror/state" "^6.0.0"
|
179
|
+
"@codemirror/view" "^6.17.0"
|
180
|
+
"@lezer/common" "^1.0.0"
|
181
|
+
|
182
|
+
"@codemirror/commands@^6.0.0":
|
183
|
+
version "6.7.1"
|
184
|
+
resolved "https://registry.yarnpkg.com/@codemirror/commands/-/commands-6.7.1.tgz#04561e95bc0779eaa49efd63e916c4efb3bbf6d6"
|
185
|
+
integrity sha512-llTrboQYw5H4THfhN4U3qCnSZ1SOJ60ohhz+SzU0ADGtwlc533DtklQP0vSFaQuCPDn3BPpOd1GbbnUtwNjsrw==
|
186
|
+
dependencies:
|
187
|
+
"@codemirror/language" "^6.0.0"
|
188
|
+
"@codemirror/state" "^6.4.0"
|
189
|
+
"@codemirror/view" "^6.27.0"
|
190
|
+
"@lezer/common" "^1.1.0"
|
191
|
+
|
192
|
+
"@codemirror/language@^6.0.0":
|
193
|
+
version "6.10.6"
|
194
|
+
resolved "https://registry.yarnpkg.com/@codemirror/language/-/language-6.10.6.tgz#3770aa55fce575b45b1037b390b576907f0061c7"
|
195
|
+
integrity sha512-KrsbdCnxEztLVbB5PycWXFxas4EOyk/fPAfruSOnDDppevQgid2XZ+KbJ9u+fDikP/e7MW7HPBTvTb8JlZK9vA==
|
196
|
+
dependencies:
|
197
|
+
"@codemirror/state" "^6.0.0"
|
198
|
+
"@codemirror/view" "^6.23.0"
|
199
|
+
"@lezer/common" "^1.1.0"
|
200
|
+
"@lezer/highlight" "^1.0.0"
|
201
|
+
"@lezer/lr" "^1.0.0"
|
202
|
+
style-mod "^4.0.0"
|
203
|
+
|
204
|
+
"@codemirror/lint@^6.0.0":
|
205
|
+
version "6.8.4"
|
206
|
+
resolved "https://registry.yarnpkg.com/@codemirror/lint/-/lint-6.8.4.tgz#7d8aa5d1a6dec89ffcc23ad45ddca2e12e90982d"
|
207
|
+
integrity sha512-u4q7PnZlJUojeRe8FJa/njJcMctISGgPQ4PnWsd9268R4ZTtU+tfFYmwkBvgcrK2+QQ8tYFVALVb5fVJykKc5A==
|
208
|
+
dependencies:
|
209
|
+
"@codemirror/state" "^6.0.0"
|
210
|
+
"@codemirror/view" "^6.35.0"
|
211
|
+
crelt "^1.0.5"
|
212
|
+
|
213
|
+
"@codemirror/search@^6.0.0":
|
214
|
+
version "6.5.8"
|
215
|
+
resolved "https://registry.yarnpkg.com/@codemirror/search/-/search-6.5.8.tgz#b59b3659b46184cc75d6108d7c050a4ca344c3a0"
|
216
|
+
integrity sha512-PoWtZvo7c1XFeZWmmyaOp2G0XVbOnm+fJzvghqGAktBW3cufwJUWvSCcNG0ppXiBEM05mZu6RhMtXPv2hpllig==
|
217
|
+
dependencies:
|
218
|
+
"@codemirror/state" "^6.0.0"
|
219
|
+
"@codemirror/view" "^6.0.0"
|
220
|
+
crelt "^1.0.5"
|
221
|
+
|
222
|
+
"@codemirror/state@^6.0.0", "@codemirror/state@^6.4.0":
|
223
|
+
version "6.4.1"
|
224
|
+
resolved "https://registry.yarnpkg.com/@codemirror/state/-/state-6.4.1.tgz#da57143695c056d9a3c38705ed34136e2b68171b"
|
225
|
+
integrity sha512-QkEyUiLhsJoZkbumGZlswmAhA7CBU02Wrz7zvH4SrcifbsqwlXShVXg65f3v/ts57W3dqyamEriMhij1Z3Zz4A==
|
226
|
+
|
227
|
+
"@codemirror/view@^6.0.0", "@codemirror/view@^6.17.0", "@codemirror/view@^6.23.0", "@codemirror/view@^6.27.0", "@codemirror/view@^6.35.0":
|
228
|
+
version "6.35.0"
|
229
|
+
resolved "https://registry.yarnpkg.com/@codemirror/view/-/view-6.35.0.tgz#890e8e31a58edf65cdf193049fe9f3fdec20cc82"
|
230
|
+
integrity sha512-I0tYy63q5XkaWsJ8QRv5h6ves7kvtrBWjBcnf/bzohFJQc5c14a1AQRdE8QpPF9eMp5Mq2FMm59TCj1gDfE7kw==
|
231
|
+
dependencies:
|
232
|
+
"@codemirror/state" "^6.4.0"
|
233
|
+
style-mod "^4.1.0"
|
234
|
+
w3c-keyname "^2.2.4"
|
235
|
+
|
172
236
|
"@esbuild/aix-ppc64@0.21.5":
|
173
237
|
version "0.21.5"
|
174
238
|
resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz#c7184a326533fcdf1b8ee0733e21c713b975575f"
|
@@ -372,6 +436,25 @@
|
|
372
436
|
"@jridgewell/resolve-uri" "^3.1.0"
|
373
437
|
"@jridgewell/sourcemap-codec" "^1.4.14"
|
374
438
|
|
439
|
+
"@lezer/common@^1.0.0", "@lezer/common@^1.1.0":
|
440
|
+
version "1.2.3"
|
441
|
+
resolved "https://registry.yarnpkg.com/@lezer/common/-/common-1.2.3.tgz#138fcddab157d83da557554851017c6c1e5667fd"
|
442
|
+
integrity sha512-w7ojc8ejBqr2REPsWxJjrMFsA/ysDCFICn8zEOR9mrqzOu2amhITYuLD8ag6XZf0CFXDrhKqw7+tW8cX66NaDA==
|
443
|
+
|
444
|
+
"@lezer/highlight@^1.0.0":
|
445
|
+
version "1.2.1"
|
446
|
+
resolved "https://registry.yarnpkg.com/@lezer/highlight/-/highlight-1.2.1.tgz#596fa8f9aeb58a608be0a563e960c373cbf23f8b"
|
447
|
+
integrity sha512-Z5duk4RN/3zuVO7Jq0pGLJ3qynpxUVsh7IbUbGj88+uV2ApSAn6kWg2au3iJb+0Zi7kKtqffIESgNcRXWZWmSA==
|
448
|
+
dependencies:
|
449
|
+
"@lezer/common" "^1.0.0"
|
450
|
+
|
451
|
+
"@lezer/lr@^1.0.0":
|
452
|
+
version "1.4.2"
|
453
|
+
resolved "https://registry.yarnpkg.com/@lezer/lr/-/lr-1.4.2.tgz#931ea3dea8e9de84e90781001dae30dea9ff1727"
|
454
|
+
integrity sha512-pu0K1jCIdnQ12aWNaAVU5bzi7Bd1w54J3ECgANPmYLtQKP0HBj2cE/5coBD66MT10xbtIuUr7tg0Shbsvk0mDA==
|
455
|
+
dependencies:
|
456
|
+
"@lezer/common" "^1.0.0"
|
457
|
+
|
375
458
|
"@nodelib/fs.scandir@2.1.5":
|
376
459
|
version "2.1.5"
|
377
460
|
resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5"
|
@@ -619,6 +702,19 @@ clipboard@^2.0.11:
|
|
619
702
|
select "^1.1.2"
|
620
703
|
tiny-emitter "^2.0.0"
|
621
704
|
|
705
|
+
codemirror@^6.0.1:
|
706
|
+
version "6.0.1"
|
707
|
+
resolved "https://registry.yarnpkg.com/codemirror/-/codemirror-6.0.1.tgz#62b91142d45904547ee3e0e0e4c1a79158035a29"
|
708
|
+
integrity sha512-J8j+nZ+CdWmIeFIGXEFbFPtpiYacFMDR8GlHK3IyHQJMCaVRfGx9NT+Hxivv1ckLWPvNdZqndbr/7lVhrf/Svg==
|
709
|
+
dependencies:
|
710
|
+
"@codemirror/autocomplete" "^6.0.0"
|
711
|
+
"@codemirror/commands" "^6.0.0"
|
712
|
+
"@codemirror/language" "^6.0.0"
|
713
|
+
"@codemirror/lint" "^6.0.0"
|
714
|
+
"@codemirror/search" "^6.0.0"
|
715
|
+
"@codemirror/state" "^6.0.0"
|
716
|
+
"@codemirror/view" "^6.0.0"
|
717
|
+
|
622
718
|
color-convert@^1.9.0:
|
623
719
|
version "1.9.3"
|
624
720
|
resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8"
|
@@ -653,6 +749,11 @@ convert-source-map@^2.0.0:
|
|
653
749
|
resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-2.0.0.tgz#4b560f649fc4e918dd0ab75cf4961e8bc882d82a"
|
654
750
|
integrity sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==
|
655
751
|
|
752
|
+
crelt@^1.0.5:
|
753
|
+
version "1.0.6"
|
754
|
+
resolved "https://registry.yarnpkg.com/crelt/-/crelt-1.0.6.tgz#7cc898ea74e190fb6ef9dae57f8f81cf7302df72"
|
755
|
+
integrity sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==
|
756
|
+
|
656
757
|
cross-spawn@^7.0.2:
|
657
758
|
version "7.0.3"
|
658
759
|
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
|
@@ -885,6 +986,11 @@ file-entry-cache@^8.0.0:
|
|
885
986
|
dependencies:
|
886
987
|
flat-cache "^4.0.0"
|
887
988
|
|
989
|
+
filtrex@^3.1.0:
|
990
|
+
version "3.1.0"
|
991
|
+
resolved "https://registry.yarnpkg.com/filtrex/-/filtrex-3.1.0.tgz#5ec00994615ff10e5e09c89bb290c855cb408c21"
|
992
|
+
integrity sha512-mHzZ2wUISETF1OaEcNRiGz1ljuIV8c/C9td9qyAZ+wTwigkAk5RO9YrCxQKk5H9v7joDRFIBik9U5RTK9eXZ/A==
|
993
|
+
|
888
994
|
find-up@^5.0.0:
|
889
995
|
version "5.0.0"
|
890
996
|
resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc"
|
@@ -1367,6 +1473,11 @@ strip-json-comments@^3.1.1:
|
|
1367
1473
|
resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
|
1368
1474
|
integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==
|
1369
1475
|
|
1476
|
+
style-mod@^4.0.0, style-mod@^4.1.0:
|
1477
|
+
version "4.1.2"
|
1478
|
+
resolved "https://registry.yarnpkg.com/style-mod/-/style-mod-4.1.2.tgz#ca238a1ad4786520f7515a8539d5a63691d7bf67"
|
1479
|
+
integrity sha512-wnD1HyVqpJUI2+eKZ+eo1UwghftP6yuFheBqqe+bWCotBjC2K1YnteJILRMs3SM4V/0dLEW1SC27MWP5y+mwmw==
|
1480
|
+
|
1370
1481
|
supports-color@^5.3.0:
|
1371
1482
|
version "5.5.0"
|
1372
1483
|
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f"
|
@@ -1442,6 +1553,11 @@ vite@^5.3.2:
|
|
1442
1553
|
optionalDependencies:
|
1443
1554
|
fsevents "~2.3.3"
|
1444
1555
|
|
1556
|
+
w3c-keyname@^2.2.4:
|
1557
|
+
version "2.2.8"
|
1558
|
+
resolved "https://registry.yarnpkg.com/w3c-keyname/-/w3c-keyname-2.2.8.tgz#7b17c8c6883d4e8b86ac8aba79d39e880f8869c5"
|
1559
|
+
integrity sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==
|
1560
|
+
|
1445
1561
|
which@^2.0.1:
|
1446
1562
|
version "2.0.2"
|
1447
1563
|
resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1"
|
File without changes
|
@@ -40,7 +40,7 @@ class HumanApprovalManager:
|
|
40
40
|
future = cast(Future[Approval], asyncio.get_event_loop().create_future())
|
41
41
|
sample = sample_active()
|
42
42
|
assert sample
|
43
|
-
assert sample.sample.id
|
43
|
+
assert sample.sample.id is not None
|
44
44
|
pending = PendingApprovalRequest(
|
45
45
|
request=request,
|
46
46
|
task=sample.task,
|
inspect_ai/approval/_policy.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
import fnmatch
|
2
|
-
import
|
2
|
+
import sys
|
3
3
|
from dataclasses import dataclass
|
4
4
|
from pathlib import Path
|
5
|
-
from re import Pattern
|
6
5
|
from typing import Any, Generator, cast
|
7
6
|
|
8
7
|
from pydantic import BaseModel, Field, model_validator
|
9
8
|
|
10
9
|
from inspect_ai._util.config import read_config_object
|
10
|
+
from inspect_ai._util.format import format_function_call
|
11
11
|
from inspect_ai._util.registry import registry_create, registry_lookup
|
12
12
|
from inspect_ai.solver._task_state import TaskState
|
13
13
|
from inspect_ai.tool._tool_call import ToolCall, ToolCallView
|
@@ -30,17 +30,23 @@ def policy_approver(policies: str | list[ApprovalPolicy]) -> Approver:
|
|
30
30
|
policies = approval_policies_from_config(policies)
|
31
31
|
|
32
32
|
# compile policy into approvers and regexes for matching
|
33
|
-
policy_matchers: list[tuple[list[
|
33
|
+
policy_matchers: list[tuple[list[str], Approver]] = []
|
34
34
|
for policy in policies:
|
35
35
|
tools = [policy.tools] if isinstance(policy.tools, str) else policy.tools
|
36
|
-
|
37
|
-
policy_matchers.append((
|
36
|
+
globs = [f"{tool}*" for tool in tools]
|
37
|
+
policy_matchers.append((globs, policy.approver))
|
38
38
|
|
39
39
|
# generator for policies that match a tool_call
|
40
40
|
def tool_approvers(tool_call: ToolCall) -> Generator[Approver, None, None]:
|
41
41
|
for policy_matcher in iter(policy_matchers):
|
42
|
+
function_call = format_function_call(
|
43
|
+
tool_call.function, tool_call.arguments, width=sys.maxsize
|
44
|
+
)
|
42
45
|
if any(
|
43
|
-
[
|
46
|
+
[
|
47
|
+
fnmatch.fnmatch(function_call, pattern)
|
48
|
+
for pattern in policy_matcher[0]
|
49
|
+
]
|
44
50
|
):
|
45
51
|
yield policy_matcher[1]
|
46
52
|
|
inspect_ai/log/_log.py
CHANGED
@@ -114,7 +114,7 @@ class EvalConfig(BaseModel):
|
|
114
114
|
|
115
115
|
|
116
116
|
class EvalSampleLimit(BaseModel):
|
117
|
-
type: Literal["context", "time", "message", "token", "operator"]
|
117
|
+
type: Literal["context", "time", "message", "token", "operator", "custom"]
|
118
118
|
"""The type of limit"""
|
119
119
|
|
120
120
|
limit: int
|
inspect_ai/log/_samples.py
CHANGED
@@ -113,6 +113,14 @@ def sample_active() -> ActiveSample | None:
|
|
113
113
|
return _sample_active.get(None)
|
114
114
|
|
115
115
|
|
116
|
+
def active_sample_token_limit() -> int | None:
|
117
|
+
active = sample_active()
|
118
|
+
if active:
|
119
|
+
return active.token_limit
|
120
|
+
else:
|
121
|
+
return None
|
122
|
+
|
123
|
+
|
116
124
|
def set_active_sample_token_limit(token_limit: int | None) -> None:
|
117
125
|
active = sample_active()
|
118
126
|
if active:
|
@@ -125,6 +133,14 @@ def set_active_sample_total_tokens(total_tokens: int) -> None:
|
|
125
133
|
active.total_tokens = total_tokens
|
126
134
|
|
127
135
|
|
136
|
+
def active_sample_message_limit() -> int | None:
|
137
|
+
active = sample_active()
|
138
|
+
if active:
|
139
|
+
return active.message_limit
|
140
|
+
else:
|
141
|
+
return None
|
142
|
+
|
143
|
+
|
128
144
|
def set_active_sample_message_limit(message_limit: int | None) -> None:
|
129
145
|
active = sample_active()
|
130
146
|
if active:
|
inspect_ai/log/_transcript.py
CHANGED
@@ -70,7 +70,7 @@ class SampleLimitEvent(BaseEvent):
|
|
70
70
|
event: Literal["sample_limit"] = Field(default="sample_limit")
|
71
71
|
"""Event type."""
|
72
72
|
|
73
|
-
type: Literal["message", "time", "token", "operator"]
|
73
|
+
type: Literal["message", "time", "token", "operator", "custom"]
|
74
74
|
"""Type of limit that halted processing"""
|
75
75
|
|
76
76
|
message: str
|
@@ -124,6 +124,9 @@ class ModelEvent(BaseEvent):
|
|
124
124
|
output: ModelOutput
|
125
125
|
"""Output from model."""
|
126
126
|
|
127
|
+
error: str | None = Field(default=None)
|
128
|
+
"""Error which occurred during model call."""
|
129
|
+
|
127
130
|
cache: Literal["read", "write"] | None = Field(default=None)
|
128
131
|
"""Was this a cache read or write."""
|
129
132
|
|
inspect_ai/model/_call_tools.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
import inspect
|
3
|
+
import json
|
3
4
|
import types
|
4
5
|
from dataclasses import is_dataclass
|
5
6
|
from logging import getLogger
|
@@ -21,6 +22,7 @@ from typing import (
|
|
21
22
|
is_typeddict,
|
22
23
|
)
|
23
24
|
|
25
|
+
import yaml
|
24
26
|
from jsonschema import Draft7Validator
|
25
27
|
from pydantic import BaseModel
|
26
28
|
|
@@ -328,6 +330,10 @@ def tool_params(input: dict[str, Any], func: Callable[..., Any]) -> dict[str, An
|
|
328
330
|
type_hints = get_type_hints(func)
|
329
331
|
docstring = inspect.getdoc(func)
|
330
332
|
|
333
|
+
# if the function takes **kwargs: Any then just pass the tool arguments through
|
334
|
+
if "kwargs" in type_hints and type_hints["kwargs"] == Any:
|
335
|
+
return input
|
336
|
+
|
331
337
|
# build params
|
332
338
|
params: dict[str, Any] = {}
|
333
339
|
for param_name, param in signature.parameters.items():
|
@@ -465,3 +471,56 @@ def truncate_tool_output(
|
|
465
471
|
)
|
466
472
|
else:
|
467
473
|
return None
|
474
|
+
|
475
|
+
|
476
|
+
def tool_parse_error_message(arguments: str, ex: Exception) -> str:
|
477
|
+
return f"Error parsing the following tool call arguments:\n\n{arguments}\n\nError details: {ex}"
|
478
|
+
|
479
|
+
|
480
|
+
def parse_tool_call(
|
481
|
+
id: str, function: str, arguments: str, tools: list[ToolInfo] | None = None
|
482
|
+
) -> ToolCall:
|
483
|
+
error: str | None = None
|
484
|
+
arguments_dict: dict[str, Any] = {}
|
485
|
+
|
486
|
+
def report_parse_error(ex: Exception) -> None:
|
487
|
+
nonlocal error
|
488
|
+
error = tool_parse_error_message(arguments, ex)
|
489
|
+
logger.info(error)
|
490
|
+
|
491
|
+
# if the arguments is a dict, then handle it with a plain json.loads
|
492
|
+
arguments = arguments.strip()
|
493
|
+
if arguments.startswith("{"):
|
494
|
+
try:
|
495
|
+
arguments_dict = json.loads(arguments)
|
496
|
+
except json.JSONDecodeError as ex:
|
497
|
+
report_parse_error(ex)
|
498
|
+
|
499
|
+
# otherwise parse it as yaml (which will pickup unquoted strings, numbers, and true/false)
|
500
|
+
# and then create a dict that maps it to the first function argument
|
501
|
+
elif function and tools:
|
502
|
+
tool_info = next(
|
503
|
+
(
|
504
|
+
tool
|
505
|
+
for tool in tools
|
506
|
+
if tool.name == function and len(tool.parameters.properties) > 0
|
507
|
+
),
|
508
|
+
None,
|
509
|
+
)
|
510
|
+
if tool_info:
|
511
|
+
param_names = list(tool_info.parameters.properties.keys())
|
512
|
+
try:
|
513
|
+
value = yaml.safe_load(arguments)
|
514
|
+
arguments_dict[param_names[0]] = value
|
515
|
+
except yaml.error.YAMLError:
|
516
|
+
# If the yaml parser fails, we treat it as a string argument.
|
517
|
+
arguments_dict[param_names[0]] = arguments
|
518
|
+
|
519
|
+
# return ToolCall with error payload
|
520
|
+
return ToolCall(
|
521
|
+
id=id,
|
522
|
+
function=function,
|
523
|
+
arguments=arguments_dict,
|
524
|
+
type="function",
|
525
|
+
parse_error=error,
|
526
|
+
)
|
@@ -15,13 +15,16 @@ MESSAGE_TITLE = "Message"
|
|
15
15
|
def conversation_tool_mesage(message: ChatMessageTool) -> None:
|
16
16
|
if display_type() == "conversation":
|
17
17
|
# truncate output to 100 lines
|
18
|
-
output =
|
19
|
-
|
20
|
-
|
21
|
-
conversation_panel(
|
22
|
-
title=f"Tool Output: {message.function}",
|
23
|
-
content=content,
|
18
|
+
output = (
|
19
|
+
message.error.message.strip() if message.error else message.text.strip()
|
24
20
|
)
|
21
|
+
if output:
|
22
|
+
content = lines_display(output, 100)
|
23
|
+
|
24
|
+
conversation_panel(
|
25
|
+
title=f"Tool Output: {message.function}",
|
26
|
+
content=content,
|
27
|
+
)
|
25
28
|
|
26
29
|
|
27
30
|
def conversation_assistant_message(
|
@@ -42,8 +45,14 @@ def conversation_assistant_message(
|
|
42
45
|
|
43
46
|
# print tool calls
|
44
47
|
if message.tool_calls:
|
45
|
-
content
|
48
|
+
if content:
|
49
|
+
content.append(Text())
|
46
50
|
content.extend(render_tool_calls(message.tool_calls))
|
47
51
|
|
48
52
|
# print the assistant message
|
49
53
|
conversation_panel(title="Assistant", content=content)
|
54
|
+
|
55
|
+
|
56
|
+
def conversation_assistant_error(error: Exception) -> None:
|
57
|
+
if display_type() == "conversation":
|
58
|
+
conversation_panel(title="Assistant", content=repr(error))
|
@@ -34,7 +34,7 @@ class GenerateConfigArgs(TypedDict, total=False):
|
|
34
34
|
"""Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."""
|
35
35
|
|
36
36
|
best_of: int | None
|
37
|
-
"""Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token).
|
37
|
+
"""Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). vLLM only."""
|
38
38
|
|
39
39
|
frequency_penalty: float | None
|
40
40
|
"""Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, and vLLM only."""
|
@@ -48,9 +48,6 @@ class GenerateConfigArgs(TypedDict, total=False):
|
|
48
48
|
seed: int | None
|
49
49
|
"""Random seed. OpenAI, Google, Mistral, Groq, HuggingFace, and vLLM only."""
|
50
50
|
|
51
|
-
suffix: str | None
|
52
|
-
"""The suffix that comes after a completion of inserted text. OpenAI only."""
|
53
|
-
|
54
51
|
top_k: int | None
|
55
52
|
"""Randomly sample the next word from the top_k most likely next words. Anthropic, Google, and HuggingFace only."""
|
56
53
|
|
@@ -58,14 +55,17 @@ class GenerateConfigArgs(TypedDict, total=False):
|
|
58
55
|
"""How many chat completion choices to generate for each input message. OpenAI, Grok, Google, and TogetherAI only."""
|
59
56
|
|
60
57
|
logprobs: bool | None
|
61
|
-
"""Return log probabilities of the output tokens. OpenAI,
|
58
|
+
"""Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
|
62
59
|
|
63
60
|
top_logprobs: int | None
|
64
|
-
"""Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI,
|
61
|
+
"""Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, and Huggingface only."""
|
65
62
|
|
66
63
|
parallel_tool_calls: bool | None
|
67
64
|
"""Whether to enable parallel function calling during tool use (defaults to True). OpenAI and Groq only."""
|
68
65
|
|
66
|
+
internal_tools: bool | None
|
67
|
+
"""Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic)."""
|
68
|
+
|
69
69
|
max_tool_output: int | None
|
70
70
|
"""Maximum tool output (in bytes). Defaults to 16 * 1024."""
|
71
71
|
|
@@ -104,7 +104,7 @@ class GenerateConfig(BaseModel):
|
|
104
104
|
"""Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."""
|
105
105
|
|
106
106
|
best_of: int | None = Field(default=None)
|
107
|
-
"""Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token).
|
107
|
+
"""Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). vLLM only."""
|
108
108
|
|
109
109
|
frequency_penalty: float | None = Field(default=None)
|
110
110
|
"""Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, and vLLM only."""
|
@@ -118,9 +118,6 @@ class GenerateConfig(BaseModel):
|
|
118
118
|
seed: int | None = Field(default=None)
|
119
119
|
"""Random seed. OpenAI, Google, Mistral, Groq, HuggingFace, and vLLM only."""
|
120
120
|
|
121
|
-
suffix: str | None = Field(default=None)
|
122
|
-
"""The suffix that comes after a completion of inserted text. OpenAI only."""
|
123
|
-
|
124
121
|
top_k: int | None = Field(default=None)
|
125
122
|
"""Randomly sample the next word from the top_k most likely next words. Anthropic, Google, HuggingFace, and vLLM only."""
|
126
123
|
|
@@ -128,14 +125,17 @@ class GenerateConfig(BaseModel):
|
|
128
125
|
"""How many chat completion choices to generate for each input message. OpenAI, Grok, Google, TogetherAI, and vLLM only."""
|
129
126
|
|
130
127
|
logprobs: bool | None = Field(default=None)
|
131
|
-
"""Return log probabilities of the output tokens. OpenAI,
|
128
|
+
"""Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
|
132
129
|
|
133
130
|
top_logprobs: int | None = Field(default=None)
|
134
|
-
"""Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI,
|
131
|
+
"""Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, Huggingface, and vLLM only."""
|
135
132
|
|
136
133
|
parallel_tool_calls: bool | None = Field(default=None)
|
137
134
|
"""Whether to enable parallel function calling during tool use (defaults to True). OpenAI and Groq only."""
|
138
135
|
|
136
|
+
internal_tools: bool | None = Field(default=None)
|
137
|
+
"""Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic)."""
|
138
|
+
|
139
139
|
max_tool_output: int | None = Field(default=None)
|
140
140
|
"""Maximum tool output (in bytes). Defaults to 16 * 1024."""
|
141
141
|
|