inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. inspect_ai/_cli/common.py +3 -1
  2. inspect_ai/_cli/eval.py +15 -9
  3. inspect_ai/_display/core/active.py +4 -1
  4. inspect_ai/_display/core/config.py +3 -3
  5. inspect_ai/_display/core/panel.py +7 -3
  6. inspect_ai/_display/plain/__init__.py +0 -0
  7. inspect_ai/_display/plain/display.py +203 -0
  8. inspect_ai/_display/rich/display.py +0 -5
  9. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  10. inspect_ai/_display/textual/widgets/samples.py +79 -12
  11. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  12. inspect_ai/_eval/eval.py +10 -1
  13. inspect_ai/_eval/loader.py +79 -19
  14. inspect_ai/_eval/registry.py +6 -0
  15. inspect_ai/_eval/score.py +3 -1
  16. inspect_ai/_eval/task/results.py +51 -22
  17. inspect_ai/_eval/task/run.py +47 -13
  18. inspect_ai/_eval/task/sandbox.py +10 -5
  19. inspect_ai/_util/constants.py +1 -0
  20. inspect_ai/_util/port_names.py +61 -0
  21. inspect_ai/_util/text.py +23 -0
  22. inspect_ai/_view/www/App.css +31 -1
  23. inspect_ai/_view/www/dist/assets/index.css +31 -1
  24. inspect_ai/_view/www/dist/assets/index.js +25498 -2044
  25. inspect_ai/_view/www/log-schema.json +32 -2
  26. inspect_ai/_view/www/package.json +2 -0
  27. inspect_ai/_view/www/src/App.mjs +14 -16
  28. inspect_ai/_view/www/src/Types.mjs +1 -2
  29. inspect_ai/_view/www/src/api/Types.ts +133 -0
  30. inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
  31. inspect_ai/_view/www/src/api/api-http.ts +219 -0
  32. inspect_ai/_view/www/src/api/api-shared.ts +47 -0
  33. inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
  34. inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
  35. inspect_ai/_view/www/src/api/index.ts +51 -0
  36. inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
  37. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  38. inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
  39. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  40. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  41. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  42. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  43. inspect_ai/_view/www/src/index.js +77 -4
  44. inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
  45. inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
  46. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
  47. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  48. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  49. inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
  50. inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
  51. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  52. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
  53. inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
  54. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  55. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  56. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +13 -2
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
  76. inspect_ai/_view/www/src/utils/vscode.ts +36 -0
  77. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
  78. inspect_ai/_view/www/vite.config.js +7 -0
  79. inspect_ai/_view/www/yarn.lock +116 -0
  80. inspect_ai/approval/_human/__init__.py +0 -0
  81. inspect_ai/approval/_human/manager.py +1 -1
  82. inspect_ai/approval/_policy.py +12 -6
  83. inspect_ai/log/_log.py +1 -1
  84. inspect_ai/log/_samples.py +16 -0
  85. inspect_ai/log/_transcript.py +4 -1
  86. inspect_ai/model/_call_tools.py +59 -0
  87. inspect_ai/model/_conversation.py +16 -7
  88. inspect_ai/model/_generate_config.py +12 -12
  89. inspect_ai/model/_model.py +117 -18
  90. inspect_ai/model/_model_output.py +22 -2
  91. inspect_ai/model/_openai.py +383 -0
  92. inspect_ai/model/_providers/anthropic.py +152 -55
  93. inspect_ai/model/_providers/azureai.py +21 -21
  94. inspect_ai/model/_providers/bedrock.py +37 -40
  95. inspect_ai/model/_providers/goodfire.py +248 -0
  96. inspect_ai/model/_providers/google.py +46 -54
  97. inspect_ai/model/_providers/groq.py +7 -3
  98. inspect_ai/model/_providers/hf.py +6 -0
  99. inspect_ai/model/_providers/mistral.py +13 -12
  100. inspect_ai/model/_providers/openai.py +51 -218
  101. inspect_ai/model/_providers/openai_o1.py +11 -12
  102. inspect_ai/model/_providers/providers.py +23 -1
  103. inspect_ai/model/_providers/together.py +12 -12
  104. inspect_ai/model/_providers/util/__init__.py +2 -3
  105. inspect_ai/model/_providers/util/hf_handler.py +1 -1
  106. inspect_ai/model/_providers/util/llama31.py +1 -1
  107. inspect_ai/model/_providers/util/util.py +0 -76
  108. inspect_ai/model/_providers/vertex.py +1 -4
  109. inspect_ai/scorer/_metric.py +3 -0
  110. inspect_ai/scorer/_reducer/reducer.py +1 -1
  111. inspect_ai/scorer/_scorer.py +4 -3
  112. inspect_ai/solver/__init__.py +4 -5
  113. inspect_ai/solver/_basic_agent.py +1 -1
  114. inspect_ai/solver/_bridge/__init__.py +3 -0
  115. inspect_ai/solver/_bridge/bridge.py +100 -0
  116. inspect_ai/solver/_bridge/patch.py +170 -0
  117. inspect_ai/solver/_prompt.py +35 -5
  118. inspect_ai/solver/_solver.py +6 -0
  119. inspect_ai/solver/_task_state.py +80 -38
  120. inspect_ai/tool/__init__.py +2 -0
  121. inspect_ai/tool/_tool.py +12 -1
  122. inspect_ai/tool/_tool_call.py +10 -0
  123. inspect_ai/tool/_tool_def.py +16 -5
  124. inspect_ai/tool/_tool_with.py +21 -4
  125. inspect_ai/tool/beta/__init__.py +5 -0
  126. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  127. inspect_ai/tool/beta/_computer/_common.py +133 -0
  128. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  129. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  130. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  131. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  134. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  135. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  136. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  137. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  138. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  139. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  144. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  145. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  146. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  147. inspect_ai/util/__init__.py +2 -0
  148. inspect_ai/util/_display.py +5 -0
  149. inspect_ai/util/_limit.py +26 -0
  150. inspect_ai/util/_sandbox/docker/docker.py +64 -1
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
  153. inspect_ai/util/_sandbox/environment.py +14 -0
  154. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
  155. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
  156. inspect_ai/_view/www/src/api/Types.mjs +0 -117
  157. inspect_ai/_view/www/src/api/api-http.mjs +0 -300
  158. inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
  159. inspect_ai/_view/www/src/api/index.mjs +0 -49
  160. inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
  161. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  162. inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
  163. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
  164. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
  165. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
  166. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
@@ -169,6 +169,70 @@
169
169
  "@babel/helper-validator-identifier" "^7.24.7"
170
170
  to-fast-properties "^2.0.0"
171
171
 
172
+ "@codemirror/autocomplete@^6.0.0":
173
+ version "6.18.3"
174
+ resolved "https://registry.yarnpkg.com/@codemirror/autocomplete/-/autocomplete-6.18.3.tgz#f9ea79a2f369662516f71bc0b2f819454d3c8e00"
175
+ integrity sha512-1dNIOmiM0z4BIBwxmxEfA1yoxh1MF/6KPBbh20a5vphGV0ictKlgQsbJs6D6SkR6iJpGbpwRsa6PFMNlg9T9pQ==
176
+ dependencies:
177
+ "@codemirror/language" "^6.0.0"
178
+ "@codemirror/state" "^6.0.0"
179
+ "@codemirror/view" "^6.17.0"
180
+ "@lezer/common" "^1.0.0"
181
+
182
+ "@codemirror/commands@^6.0.0":
183
+ version "6.7.1"
184
+ resolved "https://registry.yarnpkg.com/@codemirror/commands/-/commands-6.7.1.tgz#04561e95bc0779eaa49efd63e916c4efb3bbf6d6"
185
+ integrity sha512-llTrboQYw5H4THfhN4U3qCnSZ1SOJ60ohhz+SzU0ADGtwlc533DtklQP0vSFaQuCPDn3BPpOd1GbbnUtwNjsrw==
186
+ dependencies:
187
+ "@codemirror/language" "^6.0.0"
188
+ "@codemirror/state" "^6.4.0"
189
+ "@codemirror/view" "^6.27.0"
190
+ "@lezer/common" "^1.1.0"
191
+
192
+ "@codemirror/language@^6.0.0":
193
+ version "6.10.6"
194
+ resolved "https://registry.yarnpkg.com/@codemirror/language/-/language-6.10.6.tgz#3770aa55fce575b45b1037b390b576907f0061c7"
195
+ integrity sha512-KrsbdCnxEztLVbB5PycWXFxas4EOyk/fPAfruSOnDDppevQgid2XZ+KbJ9u+fDikP/e7MW7HPBTvTb8JlZK9vA==
196
+ dependencies:
197
+ "@codemirror/state" "^6.0.0"
198
+ "@codemirror/view" "^6.23.0"
199
+ "@lezer/common" "^1.1.0"
200
+ "@lezer/highlight" "^1.0.0"
201
+ "@lezer/lr" "^1.0.0"
202
+ style-mod "^4.0.0"
203
+
204
+ "@codemirror/lint@^6.0.0":
205
+ version "6.8.4"
206
+ resolved "https://registry.yarnpkg.com/@codemirror/lint/-/lint-6.8.4.tgz#7d8aa5d1a6dec89ffcc23ad45ddca2e12e90982d"
207
+ integrity sha512-u4q7PnZlJUojeRe8FJa/njJcMctISGgPQ4PnWsd9268R4ZTtU+tfFYmwkBvgcrK2+QQ8tYFVALVb5fVJykKc5A==
208
+ dependencies:
209
+ "@codemirror/state" "^6.0.0"
210
+ "@codemirror/view" "^6.35.0"
211
+ crelt "^1.0.5"
212
+
213
+ "@codemirror/search@^6.0.0":
214
+ version "6.5.8"
215
+ resolved "https://registry.yarnpkg.com/@codemirror/search/-/search-6.5.8.tgz#b59b3659b46184cc75d6108d7c050a4ca344c3a0"
216
+ integrity sha512-PoWtZvo7c1XFeZWmmyaOp2G0XVbOnm+fJzvghqGAktBW3cufwJUWvSCcNG0ppXiBEM05mZu6RhMtXPv2hpllig==
217
+ dependencies:
218
+ "@codemirror/state" "^6.0.0"
219
+ "@codemirror/view" "^6.0.0"
220
+ crelt "^1.0.5"
221
+
222
+ "@codemirror/state@^6.0.0", "@codemirror/state@^6.4.0":
223
+ version "6.4.1"
224
+ resolved "https://registry.yarnpkg.com/@codemirror/state/-/state-6.4.1.tgz#da57143695c056d9a3c38705ed34136e2b68171b"
225
+ integrity sha512-QkEyUiLhsJoZkbumGZlswmAhA7CBU02Wrz7zvH4SrcifbsqwlXShVXg65f3v/ts57W3dqyamEriMhij1Z3Zz4A==
226
+
227
+ "@codemirror/view@^6.0.0", "@codemirror/view@^6.17.0", "@codemirror/view@^6.23.0", "@codemirror/view@^6.27.0", "@codemirror/view@^6.35.0":
228
+ version "6.35.0"
229
+ resolved "https://registry.yarnpkg.com/@codemirror/view/-/view-6.35.0.tgz#890e8e31a58edf65cdf193049fe9f3fdec20cc82"
230
+ integrity sha512-I0tYy63q5XkaWsJ8QRv5h6ves7kvtrBWjBcnf/bzohFJQc5c14a1AQRdE8QpPF9eMp5Mq2FMm59TCj1gDfE7kw==
231
+ dependencies:
232
+ "@codemirror/state" "^6.4.0"
233
+ style-mod "^4.1.0"
234
+ w3c-keyname "^2.2.4"
235
+
172
236
  "@esbuild/aix-ppc64@0.21.5":
173
237
  version "0.21.5"
174
238
  resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz#c7184a326533fcdf1b8ee0733e21c713b975575f"
@@ -372,6 +436,25 @@
372
436
  "@jridgewell/resolve-uri" "^3.1.0"
373
437
  "@jridgewell/sourcemap-codec" "^1.4.14"
374
438
 
439
+ "@lezer/common@^1.0.0", "@lezer/common@^1.1.0":
440
+ version "1.2.3"
441
+ resolved "https://registry.yarnpkg.com/@lezer/common/-/common-1.2.3.tgz#138fcddab157d83da557554851017c6c1e5667fd"
442
+ integrity sha512-w7ojc8ejBqr2REPsWxJjrMFsA/ysDCFICn8zEOR9mrqzOu2amhITYuLD8ag6XZf0CFXDrhKqw7+tW8cX66NaDA==
443
+
444
+ "@lezer/highlight@^1.0.0":
445
+ version "1.2.1"
446
+ resolved "https://registry.yarnpkg.com/@lezer/highlight/-/highlight-1.2.1.tgz#596fa8f9aeb58a608be0a563e960c373cbf23f8b"
447
+ integrity sha512-Z5duk4RN/3zuVO7Jq0pGLJ3qynpxUVsh7IbUbGj88+uV2ApSAn6kWg2au3iJb+0Zi7kKtqffIESgNcRXWZWmSA==
448
+ dependencies:
449
+ "@lezer/common" "^1.0.0"
450
+
451
+ "@lezer/lr@^1.0.0":
452
+ version "1.4.2"
453
+ resolved "https://registry.yarnpkg.com/@lezer/lr/-/lr-1.4.2.tgz#931ea3dea8e9de84e90781001dae30dea9ff1727"
454
+ integrity sha512-pu0K1jCIdnQ12aWNaAVU5bzi7Bd1w54J3ECgANPmYLtQKP0HBj2cE/5coBD66MT10xbtIuUr7tg0Shbsvk0mDA==
455
+ dependencies:
456
+ "@lezer/common" "^1.0.0"
457
+
375
458
  "@nodelib/fs.scandir@2.1.5":
376
459
  version "2.1.5"
377
460
  resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5"
@@ -619,6 +702,19 @@ clipboard@^2.0.11:
619
702
  select "^1.1.2"
620
703
  tiny-emitter "^2.0.0"
621
704
 
705
+ codemirror@^6.0.1:
706
+ version "6.0.1"
707
+ resolved "https://registry.yarnpkg.com/codemirror/-/codemirror-6.0.1.tgz#62b91142d45904547ee3e0e0e4c1a79158035a29"
708
+ integrity sha512-J8j+nZ+CdWmIeFIGXEFbFPtpiYacFMDR8GlHK3IyHQJMCaVRfGx9NT+Hxivv1ckLWPvNdZqndbr/7lVhrf/Svg==
709
+ dependencies:
710
+ "@codemirror/autocomplete" "^6.0.0"
711
+ "@codemirror/commands" "^6.0.0"
712
+ "@codemirror/language" "^6.0.0"
713
+ "@codemirror/lint" "^6.0.0"
714
+ "@codemirror/search" "^6.0.0"
715
+ "@codemirror/state" "^6.0.0"
716
+ "@codemirror/view" "^6.0.0"
717
+
622
718
  color-convert@^1.9.0:
623
719
  version "1.9.3"
624
720
  resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8"
@@ -653,6 +749,11 @@ convert-source-map@^2.0.0:
653
749
  resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-2.0.0.tgz#4b560f649fc4e918dd0ab75cf4961e8bc882d82a"
654
750
  integrity sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==
655
751
 
752
+ crelt@^1.0.5:
753
+ version "1.0.6"
754
+ resolved "https://registry.yarnpkg.com/crelt/-/crelt-1.0.6.tgz#7cc898ea74e190fb6ef9dae57f8f81cf7302df72"
755
+ integrity sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==
756
+
656
757
  cross-spawn@^7.0.2:
657
758
  version "7.0.3"
658
759
  resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
@@ -885,6 +986,11 @@ file-entry-cache@^8.0.0:
885
986
  dependencies:
886
987
  flat-cache "^4.0.0"
887
988
 
989
+ filtrex@^3.1.0:
990
+ version "3.1.0"
991
+ resolved "https://registry.yarnpkg.com/filtrex/-/filtrex-3.1.0.tgz#5ec00994615ff10e5e09c89bb290c855cb408c21"
992
+ integrity sha512-mHzZ2wUISETF1OaEcNRiGz1ljuIV8c/C9td9qyAZ+wTwigkAk5RO9YrCxQKk5H9v7joDRFIBik9U5RTK9eXZ/A==
993
+
888
994
  find-up@^5.0.0:
889
995
  version "5.0.0"
890
996
  resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc"
@@ -1367,6 +1473,11 @@ strip-json-comments@^3.1.1:
1367
1473
  resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
1368
1474
  integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==
1369
1475
 
1476
+ style-mod@^4.0.0, style-mod@^4.1.0:
1477
+ version "4.1.2"
1478
+ resolved "https://registry.yarnpkg.com/style-mod/-/style-mod-4.1.2.tgz#ca238a1ad4786520f7515a8539d5a63691d7bf67"
1479
+ integrity sha512-wnD1HyVqpJUI2+eKZ+eo1UwghftP6yuFheBqqe+bWCotBjC2K1YnteJILRMs3SM4V/0dLEW1SC27MWP5y+mwmw==
1480
+
1370
1481
  supports-color@^5.3.0:
1371
1482
  version "5.5.0"
1372
1483
  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f"
@@ -1442,6 +1553,11 @@ vite@^5.3.2:
1442
1553
  optionalDependencies:
1443
1554
  fsevents "~2.3.3"
1444
1555
 
1556
+ w3c-keyname@^2.2.4:
1557
+ version "2.2.8"
1558
+ resolved "https://registry.yarnpkg.com/w3c-keyname/-/w3c-keyname-2.2.8.tgz#7b17c8c6883d4e8b86ac8aba79d39e880f8869c5"
1559
+ integrity sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==
1560
+
1445
1561
  which@^2.0.1:
1446
1562
  version "2.0.2"
1447
1563
  resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1"
File without changes
@@ -40,7 +40,7 @@ class HumanApprovalManager:
40
40
  future = cast(Future[Approval], asyncio.get_event_loop().create_future())
41
41
  sample = sample_active()
42
42
  assert sample
43
- assert sample.sample.id
43
+ assert sample.sample.id is not None
44
44
  pending = PendingApprovalRequest(
45
45
  request=request,
46
46
  task=sample.task,
@@ -1,13 +1,13 @@
1
1
  import fnmatch
2
- import re
2
+ import sys
3
3
  from dataclasses import dataclass
4
4
  from pathlib import Path
5
- from re import Pattern
6
5
  from typing import Any, Generator, cast
7
6
 
8
7
  from pydantic import BaseModel, Field, model_validator
9
8
 
10
9
  from inspect_ai._util.config import read_config_object
10
+ from inspect_ai._util.format import format_function_call
11
11
  from inspect_ai._util.registry import registry_create, registry_lookup
12
12
  from inspect_ai.solver._task_state import TaskState
13
13
  from inspect_ai.tool._tool_call import ToolCall, ToolCallView
@@ -30,17 +30,23 @@ def policy_approver(policies: str | list[ApprovalPolicy]) -> Approver:
30
30
  policies = approval_policies_from_config(policies)
31
31
 
32
32
  # compile policy into approvers and regexes for matching
33
- policy_matchers: list[tuple[list[Pattern[str]], Approver]] = []
33
+ policy_matchers: list[tuple[list[str], Approver]] = []
34
34
  for policy in policies:
35
35
  tools = [policy.tools] if isinstance(policy.tools, str) else policy.tools
36
- patterns = [re.compile(fnmatch.translate(tool)) for tool in tools]
37
- policy_matchers.append((patterns, policy.approver))
36
+ globs = [f"{tool}*" for tool in tools]
37
+ policy_matchers.append((globs, policy.approver))
38
38
 
39
39
  # generator for policies that match a tool_call
40
40
  def tool_approvers(tool_call: ToolCall) -> Generator[Approver, None, None]:
41
41
  for policy_matcher in iter(policy_matchers):
42
+ function_call = format_function_call(
43
+ tool_call.function, tool_call.arguments, width=sys.maxsize
44
+ )
42
45
  if any(
43
- [pattern.match(tool_call.function) for pattern in policy_matcher[0]]
46
+ [
47
+ fnmatch.fnmatch(function_call, pattern)
48
+ for pattern in policy_matcher[0]
49
+ ]
44
50
  ):
45
51
  yield policy_matcher[1]
46
52
 
inspect_ai/log/_log.py CHANGED
@@ -114,7 +114,7 @@ class EvalConfig(BaseModel):
114
114
 
115
115
 
116
116
  class EvalSampleLimit(BaseModel):
117
- type: Literal["context", "time", "message", "token", "operator"]
117
+ type: Literal["context", "time", "message", "token", "operator", "custom"]
118
118
  """The type of limit"""
119
119
 
120
120
  limit: int
@@ -113,6 +113,14 @@ def sample_active() -> ActiveSample | None:
113
113
  return _sample_active.get(None)
114
114
 
115
115
 
116
+ def active_sample_token_limit() -> int | None:
117
+ active = sample_active()
118
+ if active:
119
+ return active.token_limit
120
+ else:
121
+ return None
122
+
123
+
116
124
  def set_active_sample_token_limit(token_limit: int | None) -> None:
117
125
  active = sample_active()
118
126
  if active:
@@ -125,6 +133,14 @@ def set_active_sample_total_tokens(total_tokens: int) -> None:
125
133
  active.total_tokens = total_tokens
126
134
 
127
135
 
136
+ def active_sample_message_limit() -> int | None:
137
+ active = sample_active()
138
+ if active:
139
+ return active.message_limit
140
+ else:
141
+ return None
142
+
143
+
128
144
  def set_active_sample_message_limit(message_limit: int | None) -> None:
129
145
  active = sample_active()
130
146
  if active:
@@ -70,7 +70,7 @@ class SampleLimitEvent(BaseEvent):
70
70
  event: Literal["sample_limit"] = Field(default="sample_limit")
71
71
  """Event type."""
72
72
 
73
- type: Literal["message", "time", "token", "operator"]
73
+ type: Literal["message", "time", "token", "operator", "custom"]
74
74
  """Type of limit that halted processing"""
75
75
 
76
76
  message: str
@@ -124,6 +124,9 @@ class ModelEvent(BaseEvent):
124
124
  output: ModelOutput
125
125
  """Output from model."""
126
126
 
127
+ error: str | None = Field(default=None)
128
+ """Error which occurred during model call."""
129
+
127
130
  cache: Literal["read", "write"] | None = Field(default=None)
128
131
  """Was this a cache read or write."""
129
132
 
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import inspect
3
+ import json
3
4
  import types
4
5
  from dataclasses import is_dataclass
5
6
  from logging import getLogger
@@ -21,6 +22,7 @@ from typing import (
21
22
  is_typeddict,
22
23
  )
23
24
 
25
+ import yaml
24
26
  from jsonschema import Draft7Validator
25
27
  from pydantic import BaseModel
26
28
 
@@ -328,6 +330,10 @@ def tool_params(input: dict[str, Any], func: Callable[..., Any]) -> dict[str, An
328
330
  type_hints = get_type_hints(func)
329
331
  docstring = inspect.getdoc(func)
330
332
 
333
+ # if the function takes **kwargs: Any then just pass the tool arguments through
334
+ if "kwargs" in type_hints and type_hints["kwargs"] == Any:
335
+ return input
336
+
331
337
  # build params
332
338
  params: dict[str, Any] = {}
333
339
  for param_name, param in signature.parameters.items():
@@ -465,3 +471,56 @@ def truncate_tool_output(
465
471
  )
466
472
  else:
467
473
  return None
474
+
475
+
476
+ def tool_parse_error_message(arguments: str, ex: Exception) -> str:
477
+ return f"Error parsing the following tool call arguments:\n\n{arguments}\n\nError details: {ex}"
478
+
479
+
480
+ def parse_tool_call(
481
+ id: str, function: str, arguments: str, tools: list[ToolInfo] | None = None
482
+ ) -> ToolCall:
483
+ error: str | None = None
484
+ arguments_dict: dict[str, Any] = {}
485
+
486
+ def report_parse_error(ex: Exception) -> None:
487
+ nonlocal error
488
+ error = tool_parse_error_message(arguments, ex)
489
+ logger.info(error)
490
+
491
+ # if the arguments is a dict, then handle it with a plain json.loads
492
+ arguments = arguments.strip()
493
+ if arguments.startswith("{"):
494
+ try:
495
+ arguments_dict = json.loads(arguments)
496
+ except json.JSONDecodeError as ex:
497
+ report_parse_error(ex)
498
+
499
+ # otherwise parse it as yaml (which will pickup unquoted strings, numbers, and true/false)
500
+ # and then create a dict that maps it to the first function argument
501
+ elif function and tools:
502
+ tool_info = next(
503
+ (
504
+ tool
505
+ for tool in tools
506
+ if tool.name == function and len(tool.parameters.properties) > 0
507
+ ),
508
+ None,
509
+ )
510
+ if tool_info:
511
+ param_names = list(tool_info.parameters.properties.keys())
512
+ try:
513
+ value = yaml.safe_load(arguments)
514
+ arguments_dict[param_names[0]] = value
515
+ except yaml.error.YAMLError:
516
+ # If the yaml parser fails, we treat it as a string argument.
517
+ arguments_dict[param_names[0]] = arguments
518
+
519
+ # return ToolCall with error payload
520
+ return ToolCall(
521
+ id=id,
522
+ function=function,
523
+ arguments=arguments_dict,
524
+ type="function",
525
+ parse_error=error,
526
+ )
@@ -15,13 +15,16 @@ MESSAGE_TITLE = "Message"
15
15
  def conversation_tool_mesage(message: ChatMessageTool) -> None:
16
16
  if display_type() == "conversation":
17
17
  # truncate output to 100 lines
18
- output = message.error.message if message.error else message.text.strip()
19
- content = lines_display(output, 100)
20
-
21
- conversation_panel(
22
- title=f"Tool Output: {message.function}",
23
- content=content,
18
+ output = (
19
+ message.error.message.strip() if message.error else message.text.strip()
24
20
  )
21
+ if output:
22
+ content = lines_display(output, 100)
23
+
24
+ conversation_panel(
25
+ title=f"Tool Output: {message.function}",
26
+ content=content,
27
+ )
25
28
 
26
29
 
27
30
  def conversation_assistant_message(
@@ -42,8 +45,14 @@ def conversation_assistant_message(
42
45
 
43
46
  # print tool calls
44
47
  if message.tool_calls:
45
- content.append(Text())
48
+ if content:
49
+ content.append(Text())
46
50
  content.extend(render_tool_calls(message.tool_calls))
47
51
 
48
52
  # print the assistant message
49
53
  conversation_panel(title="Assistant", content=content)
54
+
55
+
56
+ def conversation_assistant_error(error: Exception) -> None:
57
+ if display_type() == "conversation":
58
+ conversation_panel(title="Assistant", content=repr(error))
@@ -34,7 +34,7 @@ class GenerateConfigArgs(TypedDict, total=False):
34
34
  """Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."""
35
35
 
36
36
  best_of: int | None
37
- """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). OpenAI only."""
37
+ """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). vLLM only."""
38
38
 
39
39
  frequency_penalty: float | None
40
40
  """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, and vLLM only."""
@@ -48,9 +48,6 @@ class GenerateConfigArgs(TypedDict, total=False):
48
48
  seed: int | None
49
49
  """Random seed. OpenAI, Google, Mistral, Groq, HuggingFace, and vLLM only."""
50
50
 
51
- suffix: str | None
52
- """The suffix that comes after a completion of inserted text. OpenAI only."""
53
-
54
51
  top_k: int | None
55
52
  """Randomly sample the next word from the top_k most likely next words. Anthropic, Google, and HuggingFace only."""
56
53
 
@@ -58,14 +55,17 @@ class GenerateConfigArgs(TypedDict, total=False):
58
55
  """How many chat completion choices to generate for each input message. OpenAI, Grok, Google, and TogetherAI only."""
59
56
 
60
57
  logprobs: bool | None
61
- """Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
58
+ """Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
62
59
 
63
60
  top_logprobs: int | None
64
- """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, and Huggingface only."""
61
+ """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, and Huggingface only."""
65
62
 
66
63
  parallel_tool_calls: bool | None
67
64
  """Whether to enable parallel function calling during tool use (defaults to True). OpenAI and Groq only."""
68
65
 
66
+ internal_tools: bool | None
67
+ """Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic)."""
68
+
69
69
  max_tool_output: int | None
70
70
  """Maximum tool output (in bytes). Defaults to 16 * 1024."""
71
71
 
@@ -104,7 +104,7 @@ class GenerateConfig(BaseModel):
104
104
  """Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."""
105
105
 
106
106
  best_of: int | None = Field(default=None)
107
- """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). OpenAI and vLLM only."""
107
+ """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). vLLM only."""
108
108
 
109
109
  frequency_penalty: float | None = Field(default=None)
110
110
  """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, and vLLM only."""
@@ -118,9 +118,6 @@ class GenerateConfig(BaseModel):
118
118
  seed: int | None = Field(default=None)
119
119
  """Random seed. OpenAI, Google, Mistral, Groq, HuggingFace, and vLLM only."""
120
120
 
121
- suffix: str | None = Field(default=None)
122
- """The suffix that comes after a completion of inserted text. OpenAI only."""
123
-
124
121
  top_k: int | None = Field(default=None)
125
122
  """Randomly sample the next word from the top_k most likely next words. Anthropic, Google, HuggingFace, and vLLM only."""
126
123
 
@@ -128,14 +125,17 @@ class GenerateConfig(BaseModel):
128
125
  """How many chat completion choices to generate for each input message. OpenAI, Grok, Google, TogetherAI, and vLLM only."""
129
126
 
130
127
  logprobs: bool | None = Field(default=None)
131
- """Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
128
+ """Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
132
129
 
133
130
  top_logprobs: int | None = Field(default=None)
134
- """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, Huggingface, and vLLM only."""
131
+ """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, Huggingface, and vLLM only."""
135
132
 
136
133
  parallel_tool_calls: bool | None = Field(default=None)
137
134
  """Whether to enable parallel function calling during tool use (defaults to True). OpenAI and Groq only."""
138
135
 
136
+ internal_tools: bool | None = Field(default=None)
137
+ """Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic)."""
138
+
139
139
  max_tool_output: int | None = Field(default=None)
140
140
  """Maximum tool output (in bytes). Defaults to 16 * 1024."""
141
141