inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/eval.py +35 -2
  3. inspect_ai/_cli/util.py +44 -1
  4. inspect_ai/_display/core/config.py +1 -1
  5. inspect_ai/_display/core/display.py +13 -4
  6. inspect_ai/_display/core/results.py +1 -1
  7. inspect_ai/_display/textual/app.py +14 -3
  8. inspect_ai/_display/textual/display.py +4 -0
  9. inspect_ai/_display/textual/widgets/samples.py +9 -3
  10. inspect_ai/_display/textual/widgets/task_detail.py +8 -8
  11. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  12. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  13. inspect_ai/_eval/eval.py +74 -25
  14. inspect_ai/_eval/evalset.py +22 -18
  15. inspect_ai/_eval/loader.py +34 -11
  16. inspect_ai/_eval/run.py +13 -15
  17. inspect_ai/_eval/score.py +13 -3
  18. inspect_ai/_eval/task/generate.py +8 -9
  19. inspect_ai/_eval/task/log.py +55 -6
  20. inspect_ai/_eval/task/run.py +51 -10
  21. inspect_ai/_eval/task/task.py +23 -9
  22. inspect_ai/_util/constants.py +2 -0
  23. inspect_ai/_util/file.py +30 -1
  24. inspect_ai/_util/json.py +37 -1
  25. inspect_ai/_util/registry.py +1 -0
  26. inspect_ai/_util/vscode.py +37 -0
  27. inspect_ai/_view/server.py +113 -1
  28. inspect_ai/_view/www/App.css +7 -1
  29. inspect_ai/_view/www/dist/assets/index.css +813 -415
  30. inspect_ai/_view/www/dist/assets/index.js +54475 -32003
  31. inspect_ai/_view/www/eslint.config.mjs +1 -1
  32. inspect_ai/_view/www/log-schema.json +137 -31
  33. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  34. inspect_ai/_view/www/package.json +11 -2
  35. inspect_ai/_view/www/src/App.tsx +161 -853
  36. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  37. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  38. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  39. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  40. inspect_ai/_view/www/src/api/types.ts +107 -2
  41. inspect_ai/_view/www/src/appearance/icons.ts +2 -0
  42. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  43. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  46. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  47. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  48. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  49. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  50. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  51. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  52. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  53. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  54. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  55. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
  56. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  57. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  58. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  59. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  60. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  61. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  62. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  63. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  64. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  65. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  66. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  67. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  68. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  69. inspect_ai/_view/www/src/index.tsx +26 -94
  70. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  71. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  72. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  73. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  74. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  75. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  76. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  77. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  78. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
  79. inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
  80. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  81. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
  82. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
  83. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
  84. inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
  85. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  86. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  87. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  88. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
  89. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
  90. inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
  91. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  92. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
  93. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  94. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
  95. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  96. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  98. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
  99. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  100. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
  101. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
  102. inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
  103. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  104. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
  105. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  106. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  107. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  108. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  109. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  110. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
  111. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  112. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  113. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  114. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  115. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  116. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  117. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  118. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  119. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  120. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  121. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  122. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  123. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  124. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  125. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  126. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  127. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
  128. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  129. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  130. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  131. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  132. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
  133. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  134. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  135. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
  136. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  137. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  138. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  139. inspect_ai/_view/www/src/state/hooks.ts +399 -0
  140. inspect_ai/_view/www/src/state/logPolling.ts +200 -0
  141. inspect_ai/_view/www/src/state/logSlice.ts +224 -0
  142. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  143. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  144. inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
  145. inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
  146. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  147. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  148. inspect_ai/_view/www/src/state/store.ts +168 -0
  149. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  150. inspect_ai/_view/www/src/state/utils.ts +23 -0
  151. inspect_ai/_view/www/src/storage/index.ts +26 -0
  152. inspect_ai/_view/www/src/types/log.d.ts +36 -26
  153. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  154. inspect_ai/_view/www/src/types.ts +94 -32
  155. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  156. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  157. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  158. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  159. inspect_ai/_view/www/src/utils/react.ts +30 -0
  160. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  161. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
  162. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  163. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  164. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  165. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  166. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
  167. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
  168. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  169. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  170. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  171. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  172. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
  173. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  174. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  175. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  176. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  177. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  178. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  179. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  180. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  181. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
  182. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  183. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  184. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  185. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  186. inspect_ai/_view/www/vite.config.js +6 -0
  187. inspect_ai/_view/www/yarn.lock +464 -355
  188. inspect_ai/agent/__init__.py +36 -0
  189. inspect_ai/agent/_agent.py +268 -0
  190. inspect_ai/agent/_as_solver.py +72 -0
  191. inspect_ai/agent/_as_tool.py +122 -0
  192. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  193. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  194. inspect_ai/agent/_filter.py +46 -0
  195. inspect_ai/agent/_handoff.py +93 -0
  196. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  197. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  198. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  199. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  200. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  201. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  202. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  203. inspect_ai/agent/_react.py +241 -0
  204. inspect_ai/agent/_run.py +36 -0
  205. inspect_ai/agent/_types.py +81 -0
  206. inspect_ai/log/_condense.py +26 -0
  207. inspect_ai/log/_log.py +17 -5
  208. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  209. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  210. inspect_ai/log/_recorders/buffer/database.py +685 -0
  211. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  212. inspect_ai/log/_recorders/buffer/types.py +84 -0
  213. inspect_ai/log/_recorders/eval.py +2 -11
  214. inspect_ai/log/_recorders/types.py +30 -0
  215. inspect_ai/log/_transcript.py +32 -2
  216. inspect_ai/model/__init__.py +7 -1
  217. inspect_ai/model/_call_tools.py +257 -52
  218. inspect_ai/model/_chat_message.py +7 -4
  219. inspect_ai/model/_conversation.py +13 -62
  220. inspect_ai/model/_display.py +85 -0
  221. inspect_ai/model/_generate_config.py +2 -2
  222. inspect_ai/model/_model.py +114 -14
  223. inspect_ai/model/_model_output.py +14 -9
  224. inspect_ai/model/_openai.py +16 -4
  225. inspect_ai/model/_openai_computer_use.py +162 -0
  226. inspect_ai/model/_openai_responses.py +319 -165
  227. inspect_ai/model/_providers/anthropic.py +20 -21
  228. inspect_ai/model/_providers/azureai.py +24 -13
  229. inspect_ai/model/_providers/bedrock.py +1 -7
  230. inspect_ai/model/_providers/cloudflare.py +3 -3
  231. inspect_ai/model/_providers/goodfire.py +2 -6
  232. inspect_ai/model/_providers/google.py +11 -10
  233. inspect_ai/model/_providers/groq.py +6 -3
  234. inspect_ai/model/_providers/hf.py +7 -3
  235. inspect_ai/model/_providers/mistral.py +7 -10
  236. inspect_ai/model/_providers/openai.py +47 -17
  237. inspect_ai/model/_providers/openai_o1.py +11 -4
  238. inspect_ai/model/_providers/openai_responses.py +12 -14
  239. inspect_ai/model/_providers/providers.py +2 -2
  240. inspect_ai/model/_providers/together.py +12 -2
  241. inspect_ai/model/_providers/util/chatapi.py +7 -2
  242. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  243. inspect_ai/model/_providers/util/llama31.py +4 -2
  244. inspect_ai/model/_providers/vertex.py +11 -9
  245. inspect_ai/model/_providers/vllm.py +4 -4
  246. inspect_ai/scorer/__init__.py +2 -0
  247. inspect_ai/scorer/_metrics/__init__.py +2 -0
  248. inspect_ai/scorer/_metrics/grouped.py +84 -0
  249. inspect_ai/scorer/_score.py +26 -6
  250. inspect_ai/solver/__init__.py +2 -2
  251. inspect_ai/solver/_basic_agent.py +22 -9
  252. inspect_ai/solver/_bridge.py +31 -0
  253. inspect_ai/solver/_chain.py +20 -12
  254. inspect_ai/solver/_fork.py +5 -1
  255. inspect_ai/solver/_human_agent.py +52 -0
  256. inspect_ai/solver/_prompt.py +3 -1
  257. inspect_ai/solver/_run.py +59 -0
  258. inspect_ai/solver/_solver.py +14 -4
  259. inspect_ai/solver/_task_state.py +5 -3
  260. inspect_ai/tool/_tool_call.py +15 -8
  261. inspect_ai/tool/_tool_def.py +17 -12
  262. inspect_ai/tool/_tool_support_helpers.py +4 -4
  263. inspect_ai/tool/_tool_with.py +14 -11
  264. inspect_ai/tool/_tools/_bash_session.py +11 -2
  265. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  266. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  267. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  268. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  269. inspect_ai/tool/_tools/_think.py +1 -1
  270. inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
  271. inspect_ai/util/__init__.py +2 -0
  272. inspect_ai/util/_anyio.py +27 -0
  273. inspect_ai/util/_sandbox/__init__.py +2 -1
  274. inspect_ai/util/_sandbox/context.py +32 -7
  275. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  276. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  277. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  278. inspect_ai/util/_store_model.py +30 -7
  279. inspect_ai/util/_subprocess.py +13 -3
  280. inspect_ai/util/_subtask.py +1 -0
  281. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  282. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
  283. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
  284. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  285. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  286. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  287. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  288. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  289. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  290. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  291. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  292. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  293. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  294. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  295. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  296. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  297. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,259 @@
1
+ import os
2
+ import tempfile
3
+ from logging import getLogger
4
+ from pathlib import Path
5
+ from typing import Literal
6
+ from zipfile import ZIP_DEFLATED, ZipFile
7
+
8
+ from pydantic import BaseModel, Field
9
+ from typing_extensions import override
10
+
11
+ from inspect_ai._display.core.display import TaskDisplayMetric
12
+ from inspect_ai._util.constants import DEFAULT_LOG_SHARED, EVAL_LOG_FORMAT
13
+ from inspect_ai._util.file import FileSystem, basename, dirname, file, filesystem
14
+ from inspect_ai._util.json import to_json_safe, to_json_str_safe
15
+ from inspect_ai.log._file import read_eval_log
16
+
17
+ from ..types import SampleSummary
18
+ from .types import SampleBuffer, SampleData, Samples
19
+
20
+ logger = getLogger(__name__)
21
+
22
+
23
+ class Segment(BaseModel):
24
+ id: int
25
+ last_event_id: int
26
+ last_attachment_id: int
27
+
28
+
29
+ class SegmentFile(BaseModel):
30
+ id: str | int
31
+ epoch: int
32
+ data: SampleData
33
+
34
+
35
+ class SampleManifest(BaseModel):
36
+ summary: SampleSummary
37
+ segments: list[int] = Field(default_factory=list)
38
+
39
+
40
+ class Manifest(BaseModel):
41
+ metrics: list[TaskDisplayMetric] = Field(default_factory=list)
42
+ samples: list[SampleManifest] = Field(default_factory=list)
43
+ segments: list[Segment] = Field(default_factory=list)
44
+
45
+
46
+ MANIFEST = "manifest.json"
47
+
48
+
49
+ class SampleBufferFilestore(SampleBuffer):
50
+ def __init__(
51
+ self,
52
+ location: str,
53
+ *,
54
+ create: bool = True,
55
+ update_interval: int = DEFAULT_LOG_SHARED,
56
+ ) -> None:
57
+ self._fs = filesystem(location)
58
+ self._dir = f"{sample_buffer_dir(dirname(location), self._fs)}{self._fs.sep}{os.path.splitext(basename(location))[0]}{self._fs.sep}"
59
+ self.update_interval = update_interval
60
+
61
+ if create:
62
+ self._fs.mkdir(self._dir, exist_ok=True)
63
+
64
+ # place a file in the dir to force it to be created
65
+ self._fs.touch(f"{self._dir}.keep")
66
+
67
+ def write_manifest(self, manifest: Manifest) -> None:
68
+ with file(self._manifest_file(), "wb") as f:
69
+ f.write(to_json_safe(manifest))
70
+
71
+ def write_segment(self, id: int, files: list[SegmentFile]) -> None:
72
+ # write the file locally
73
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False) as segment_file:
74
+ name = segment_file.name
75
+ with ZipFile(
76
+ segment_file, mode="w", compression=ZIP_DEFLATED, compresslevel=5
77
+ ) as zip:
78
+ for sf in files:
79
+ zip.writestr(
80
+ segment_file_name(sf.id, sf.epoch),
81
+ to_json_str_safe(sf.data),
82
+ )
83
+ segment_file.flush()
84
+ os.fsync(segment_file.fileno())
85
+
86
+ # write then move for atomicity
87
+ try:
88
+ with open(name, "rb") as zf:
89
+ with file(f"{self._dir}{segment_name(id)}", "wb") as f:
90
+ f.write(zf.read())
91
+ f.flush()
92
+ finally:
93
+ os.unlink(name)
94
+
95
+ def read_manifest(self) -> Manifest | None:
96
+ try:
97
+ with file(self._manifest_file(), "r") as f:
98
+ contents = f.read()
99
+ return Manifest.model_validate_json(contents)
100
+ except FileNotFoundError:
101
+ return None
102
+
103
+ def read_segment_data(
104
+ self, id: int, sample_id: str | int, epoch_id: int
105
+ ) -> SampleData:
106
+ segment_file = f"{self._dir}{segment_name(id)}"
107
+ with file(segment_file, "rb") as f:
108
+ with ZipFile(f, mode="r") as zip:
109
+ with zip.open(segment_file_name(sample_id, epoch_id), "r") as sf:
110
+ return SampleData.model_validate_json(sf.read())
111
+
112
+ def cleanup(self) -> None:
113
+ cleanup_sample_buffer_filestore(self._dir, self._fs)
114
+
115
+ @classmethod
116
+ @override
117
+ def running_tasks(cls, log_dir: str) -> list[str] | None:
118
+ buffer_dir = Path(sample_buffer_dir(log_dir))
119
+ if buffer_dir.exists():
120
+ return [
121
+ f"{basename(path.name)}.{EVAL_LOG_FORMAT}"
122
+ for path in buffer_dir.iterdir()
123
+ if path.is_dir()
124
+ ]
125
+ else:
126
+ return None
127
+
128
+ @override
129
+ def get_samples(
130
+ self, etag: str | None = None
131
+ ) -> Samples | Literal["NotModified"] | None:
132
+ # get the etag on the filestore
133
+ try:
134
+ info = self._fs.info(self._manifest_file())
135
+ fs_etag = info.etag or f"{info.mtime}{info.size}"
136
+ except FileNotFoundError:
137
+ return None
138
+
139
+ # if the etag matches then return not modified
140
+ if etag == fs_etag:
141
+ return "NotModified"
142
+
143
+ # read the manifest
144
+ manifest = self.read_manifest()
145
+ if manifest is None:
146
+ return None
147
+
148
+ # provide samples + etag from the manifest
149
+ return Samples(
150
+ samples=[sm.summary for sm in manifest.samples],
151
+ metrics=manifest.metrics,
152
+ refresh=self.update_interval,
153
+ etag=fs_etag,
154
+ )
155
+
156
+ @override
157
+ def get_sample_data(
158
+ self,
159
+ id: str | int,
160
+ epoch: int,
161
+ after_event_id: int | None = None,
162
+ after_attachment_id: int | None = None,
163
+ ) -> SampleData | None:
164
+ # read the manifest
165
+ manifest = self.read_manifest()
166
+ if manifest is None:
167
+ return None
168
+
169
+ # find this sample in the manifest
170
+ sample = next(
171
+ (
172
+ sample
173
+ for sample in manifest.samples
174
+ if sample.summary.id == id and sample.summary.epoch == epoch
175
+ ),
176
+ None,
177
+ )
178
+ if sample is None:
179
+ return None
180
+
181
+ # determine which segments we need to return in order to
182
+ # satisfy the after_event_id and after_attachment_id
183
+ after_event_id = after_event_id or -1
184
+ after_attachment_id = after_attachment_id or -1
185
+ segments = [
186
+ segment for segment in manifest.segments if segment.id in sample.segments
187
+ ]
188
+ segments = [
189
+ segment
190
+ for segment in segments
191
+ if segment.last_event_id > after_event_id
192
+ or segment.last_attachment_id > after_attachment_id
193
+ ]
194
+
195
+ # collect data from the segments
196
+ sample_data = SampleData(events=[], attachments=[])
197
+ for segment in segments:
198
+ data = self.read_segment_data(segment.id, id, epoch)
199
+ sample_data.events.extend(data.events)
200
+ sample_data.attachments.extend(data.attachments)
201
+
202
+ return sample_data
203
+
204
+ def _manifest_file(self) -> str:
205
+ return f"{self._dir}{MANIFEST}"
206
+
207
+
208
+ def cleanup_sample_buffer_filestores(log_dir: str) -> None:
209
+ # read log buffer dirs (bail if there is no buffer_dir)
210
+ fs = filesystem(log_dir)
211
+ buffer_dir = sample_buffer_dir(log_dir, fs)
212
+ try:
213
+ log_buffers = [
214
+ buffer for buffer in fs.ls(buffer_dir) if buffer.type == "directory"
215
+ ]
216
+ except FileNotFoundError:
217
+ return
218
+
219
+ # for each buffer dir, confirm there is a running .eval file
220
+ # (remove the buffer dir if there is no .eval or the eval is finished)
221
+ for log_buffer in log_buffers:
222
+ try:
223
+ log_file = f"{log_dir}{fs.sep}{basename(log_buffer.name)}.{EVAL_LOG_FORMAT}"
224
+ log_header = read_eval_log(log_file, header_only=True)
225
+ if log_header.status != "started":
226
+ cleanup_sample_buffer_filestore(log_buffer.name, fs)
227
+
228
+ except FileNotFoundError:
229
+ cleanup_sample_buffer_filestore(log_buffer.name, fs)
230
+
231
+ # remove the .buffer dir if it's empty
232
+ try:
233
+ if len(fs.ls(buffer_dir)) == 0:
234
+ fs.rm(buffer_dir, recursive=True)
235
+ except FileNotFoundError:
236
+ pass
237
+
238
+
239
+ def cleanup_sample_buffer_filestore(buffer_dir: str, fs: FileSystem) -> None:
240
+ try:
241
+ fs.rm(buffer_dir, recursive=True)
242
+ except Exception as ex:
243
+ logger.warning(
244
+ f"Error cleaning up sample buffer database at {buffer_dir}: {ex}"
245
+ )
246
+
247
+
248
+ def segment_name(id: int) -> str:
249
+ return f"segment.{id}.zip"
250
+
251
+
252
+ def segment_file_name(id: str | int, epoch: int) -> str:
253
+ return f"{id}_{epoch}.json"
254
+
255
+
256
+ def sample_buffer_dir(log_dir: str, fs: FileSystem | None = None) -> str:
257
+ log_dir = log_dir.rstrip("/\\")
258
+ fs = fs or filesystem(log_dir)
259
+ return f"{log_dir}{fs.sep}.buffer"
@@ -0,0 +1,84 @@
1
+ import abc
2
+ from typing import Literal, TypeAlias
3
+
4
+ from pydantic import BaseModel, JsonValue
5
+
6
+ from inspect_ai._display.core.display import TaskDisplayMetric
7
+
8
+ from ..types import SampleSummary
9
+
10
+ JsonData: TypeAlias = dict[str, JsonValue]
11
+
12
+
13
+ class Samples(BaseModel):
14
+ samples: list[SampleSummary]
15
+ metrics: list[TaskDisplayMetric]
16
+ refresh: int
17
+ etag: str
18
+
19
+
20
+ class EventData(BaseModel):
21
+ id: int
22
+ event_id: str
23
+ sample_id: str
24
+ epoch: int
25
+ event: JsonData
26
+
27
+
28
+ class AttachmentData(BaseModel):
29
+ id: int
30
+ sample_id: str
31
+ epoch: int
32
+ hash: str
33
+ content: str
34
+
35
+
36
+ class SampleData(BaseModel):
37
+ events: list[EventData]
38
+ attachments: list[AttachmentData]
39
+
40
+
41
+ class SampleBuffer(abc.ABC):
42
+ @classmethod
43
+ @abc.abstractmethod
44
+ def running_tasks(cls, log_dir: str) -> list[str] | None: ...
45
+
46
+ @abc.abstractmethod
47
+ def get_samples(
48
+ self, etag: str | None = None
49
+ ) -> Samples | Literal["NotModified"] | None:
50
+ """Get the manifest of all running samples.
51
+
52
+ Args:
53
+ etag: Optional etag (returned in `Samples`) for checking
54
+ whether there are any changes in the datatabase.
55
+
56
+ Returns:
57
+ - `Samples` if the database exists and has updates
58
+ - "NotModifed" if the database exists and has no updates.
59
+ - None if the database no longer exists
60
+
61
+ """
62
+ ...
63
+
64
+ @abc.abstractmethod
65
+ def get_sample_data(
66
+ self,
67
+ id: str | int,
68
+ epoch: int,
69
+ after_event_id: int | None = None,
70
+ after_attachment_id: int | None = None,
71
+ ) -> SampleData | None:
72
+ """Get event and attachment data for a sample.
73
+
74
+ Args:
75
+ id: Sample id
76
+ epoch: Sample epoch
77
+ after_event_id: Optional. Fetch only event data greater than this id.
78
+ after_attachment_id: Optioinal. Fetch only attachment data greater than this id.
79
+
80
+ Returns:
81
+ - `SampleData` with event and attachment data.
82
+ - None if the database no longer exists
83
+ """
84
+ ...
@@ -23,7 +23,6 @@ from inspect_ai._util.file import FileSystem, dirname, file, filesystem
23
23
  from inspect_ai._util.json import jsonable_python
24
24
  from inspect_ai._util.trace import trace_action
25
25
  from inspect_ai.model._chat_message import ChatMessage
26
- from inspect_ai.scorer._metric import Score
27
26
 
28
27
  from .._log import (
29
28
  EvalLog,
@@ -36,20 +35,11 @@ from .._log import (
36
35
  sort_samples,
37
36
  )
38
37
  from .file import FileRecorder
38
+ from .types import SampleSummary
39
39
 
40
40
  logger = getLogger(__name__)
41
41
 
42
42
 
43
- class SampleSummary(BaseModel):
44
- id: int | str
45
- epoch: int
46
- input: str | list[ChatMessage]
47
- target: str | list[str]
48
- scores: dict[str, Score] | None = Field(default=None)
49
- error: str | None = Field(default=None)
50
- limit: str | None = Field(default=None)
51
-
52
-
53
43
  class LogStart(BaseModel):
54
44
  version: int
55
45
  eval: EvalSpec
@@ -331,6 +321,7 @@ class ZipLogFile:
331
321
  epoch=sample.epoch,
332
322
  input=text_inputs(sample.input),
333
323
  target=sample.target,
324
+ completed=True,
334
325
  scores=sample.scores,
335
326
  error=sample.error.message
336
327
  if sample.error is not None
@@ -0,0 +1,30 @@
1
+ from pydantic import BaseModel, Field, model_validator
2
+
3
+ from inspect_ai.log._transcript import Event
4
+ from inspect_ai.model._chat_message import ChatMessage
5
+ from inspect_ai.scorer._metric import Score
6
+
7
+
8
+ class SampleEvent(BaseModel):
9
+ id: str | int
10
+ epoch: int
11
+ event: Event
12
+
13
+
14
+ class SampleSummary(BaseModel):
15
+ id: int | str
16
+ epoch: int
17
+ input: str | list[ChatMessage]
18
+ target: str | list[str]
19
+ completed: bool = Field(default=False)
20
+ scores: dict[str, Score] | None = Field(default=None)
21
+ error: str | None = Field(default=None)
22
+ limit: str | None = Field(default=None)
23
+
24
+ @model_validator(mode="after")
25
+ def thin_scores(self) -> "SampleSummary":
26
+ if self.scores is not None:
27
+ self.scores = {
28
+ key: Score(value=score.value) for key, score in self.scores.items()
29
+ }
30
+ return self
@@ -15,6 +15,7 @@ from typing import (
15
15
  )
16
16
 
17
17
  from pydantic import BaseModel, ConfigDict, Field, JsonValue, field_serializer
18
+ from shortuuid import uuid
18
19
 
19
20
  from inspect_ai._util.constants import SAMPLE_SUBTASK
20
21
  from inspect_ai._util.error import EvalError
@@ -43,6 +44,13 @@ logger = getLogger(__name__)
43
44
 
44
45
 
45
46
  class BaseEvent(BaseModel):
47
+ model_config = {
48
+ "json_schema_extra": lambda schema: schema.get("properties", {}).pop(
49
+ "id_", None
50
+ )
51
+ }
52
+ id_: str = Field(default_factory=lambda: str(uuid()), exclude=True)
53
+
46
54
  timestamp: datetime = Field(default_factory=datetime.now)
47
55
  """Clock time at which event occurred."""
48
56
 
@@ -170,8 +178,8 @@ class ToolEvent(BaseEvent):
170
178
  arguments: dict[str, JsonValue]
171
179
  """Arguments to function."""
172
180
 
173
- internal_name: str | None = Field(default=None)
174
- """Internal name for tool (if any)."""
181
+ internal: JsonValue | None = Field(default=None)
182
+ """Model provider specific payload - typically used to aid transformation back to model types."""
175
183
 
176
184
  view: ToolCallContent | None = Field(default=None)
177
185
  """Custom view of tool call input."""
@@ -194,6 +202,12 @@ class ToolEvent(BaseEvent):
194
202
  working_time: float | None = Field(default=None)
195
203
  """Working time for tool call (i.e. time not spent waiting on semaphores)."""
196
204
 
205
+ agent: str | None = Field(default=None)
206
+ """Name of agent if the tool call was an agent handoff."""
207
+
208
+ failed: bool | None = Field(default=None)
209
+ """Did the tool call fail with a hard error?."""
210
+
197
211
  def _set_result(
198
212
  self,
199
213
  result: ToolResult,
@@ -201,6 +215,8 @@ class ToolEvent(BaseEvent):
201
215
  error: ToolCallError | None,
202
216
  events: list["Event"],
203
217
  waiting_time: float,
218
+ agent: str | None,
219
+ failed: bool | None,
204
220
  ) -> None:
205
221
  self.result = result
206
222
  self.truncated = truncated
@@ -210,6 +226,8 @@ class ToolEvent(BaseEvent):
210
226
  completed = datetime.now()
211
227
  self.completed = completed
212
228
  self.working_time = (completed - self.timestamp).total_seconds() - waiting_time
229
+ self.agent = agent
230
+ self.failed = failed
213
231
 
214
232
  # mechanism for operator to cancel the tool call
215
233
 
@@ -451,8 +469,11 @@ ET = TypeVar("ET", bound=BaseEvent)
451
469
  class Transcript:
452
470
  """Transcript of events."""
453
471
 
472
+ _event_logger: Callable[[Event], None] | None
473
+
454
474
  def __init__(self, name: str = "") -> None:
455
475
  self.name = name
476
+ self._event_logger = None
456
477
  self._events: list[Event] = []
457
478
 
458
479
  def info(self, data: JsonValue, *, source: str | None = None) -> None:
@@ -493,8 +514,17 @@ class Transcript:
493
514
  return None
494
515
 
495
516
  def _event(self, event: Event) -> None:
517
+ if self._event_logger:
518
+ self._event_logger(event)
496
519
  self._events.append(event)
497
520
 
521
+ def _event_updated(self, event: Event) -> None:
522
+ if self._event_logger:
523
+ self._event_logger(event)
524
+
525
+ def _subscribe(self, event_logger: Callable[[Event], None]) -> None:
526
+ self._event_logger = event_logger
527
+
498
528
 
499
529
  def transcript() -> Transcript:
500
530
  """Get the current `Transcript`."""
@@ -18,7 +18,7 @@ from ._cache import (
18
18
  cache_prune,
19
19
  cache_size,
20
20
  )
21
- from ._call_tools import call_tools
21
+ from ._call_tools import ExecuteToolsResult, call_tools, execute_tools
22
22
  from ._chat_message import (
23
23
  ChatMessage,
24
24
  ChatMessageAssistant,
@@ -27,6 +27,7 @@ from ._chat_message import (
27
27
  ChatMessageTool,
28
28
  ChatMessageUser,
29
29
  )
30
+ from ._conversation import ModelConversation
30
31
  from ._generate_config import GenerateConfig, GenerateConfigArgs, ResponseSchema
31
32
  from ._model import (
32
33
  Model,
@@ -34,6 +35,7 @@ from ._model import (
34
35
  ModelName,
35
36
  get_model,
36
37
  )
38
+ from ._model_call import ModelCall
37
39
  from ._model_output import (
38
40
  ChatCompletionChoice,
39
41
  Logprob,
@@ -64,7 +66,9 @@ __all__ = [
64
66
  "ChatMessageAssistant",
65
67
  "ChatMessageTool",
66
68
  "ChatCompletionChoice",
69
+ "ModelCall",
67
70
  "ModelOutput",
71
+ "ModelConversation",
68
72
  "Logprobs",
69
73
  "Logprob",
70
74
  "TopLogprob",
@@ -74,6 +78,8 @@ __all__ = [
74
78
  "ModelUsage",
75
79
  "StopReason",
76
80
  "call_tools",
81
+ "execute_tools",
82
+ "ExecuteToolsResult",
77
83
  "cache_clear",
78
84
  "cache_list_expired",
79
85
  "cache_path",