inspect-ai 0.3.81__py3-none-any.whl → 0.3.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. inspect_ai/_cli/eval.py +35 -2
  2. inspect_ai/_cli/util.py +44 -1
  3. inspect_ai/_display/core/config.py +1 -1
  4. inspect_ai/_display/core/display.py +13 -4
  5. inspect_ai/_display/core/results.py +1 -1
  6. inspect_ai/_display/textual/widgets/task_detail.py +5 -4
  7. inspect_ai/_eval/eval.py +38 -1
  8. inspect_ai/_eval/evalset.py +5 -0
  9. inspect_ai/_eval/run.py +5 -2
  10. inspect_ai/_eval/task/log.py +53 -6
  11. inspect_ai/_eval/task/run.py +51 -10
  12. inspect_ai/_util/constants.py +2 -0
  13. inspect_ai/_util/file.py +17 -1
  14. inspect_ai/_util/json.py +36 -1
  15. inspect_ai/_view/server.py +113 -1
  16. inspect_ai/_view/www/App.css +1 -1
  17. inspect_ai/_view/www/dist/assets/index.css +518 -296
  18. inspect_ai/_view/www/dist/assets/index.js +38803 -36307
  19. inspect_ai/_view/www/eslint.config.mjs +1 -1
  20. inspect_ai/_view/www/log-schema.json +13 -0
  21. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  22. inspect_ai/_view/www/package.json +8 -2
  23. inspect_ai/_view/www/src/App.tsx +151 -855
  24. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  25. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  26. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  27. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  28. inspect_ai/_view/www/src/api/types.ts +107 -2
  29. inspect_ai/_view/www/src/appearance/icons.ts +1 -0
  30. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  31. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  32. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  33. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  34. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  35. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  36. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  37. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  38. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  39. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  40. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -3
  41. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  42. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  43. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  44. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  45. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  46. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  47. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  48. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  49. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  50. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  51. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  52. inspect_ai/_view/www/src/index.tsx +26 -94
  53. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  54. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  55. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  56. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  57. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  58. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +67 -28
  59. inspect_ai/_view/www/src/samples/SampleDialog.tsx +51 -22
  60. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  61. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +144 -90
  62. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
  63. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +82 -35
  64. inspect_ai/_view/www/src/samples/SamplesTools.tsx +23 -30
  65. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  66. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  67. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  68. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +4 -1
  69. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +3 -0
  70. inspect_ai/_view/www/src/samples/chat/messages.ts +34 -0
  71. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  72. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +10 -1
  73. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  74. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +25 -17
  75. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +2 -1
  76. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  77. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +21 -3
  78. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +20 -1
  79. inspect_ai/_view/www/src/samples/list/SampleList.tsx +105 -85
  80. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  81. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +27 -14
  82. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  83. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  84. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  85. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  86. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  87. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +7 -9
  88. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +7 -11
  89. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  90. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  91. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  92. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  93. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  94. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  95. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  96. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  97. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  98. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  99. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  100. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  101. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +8 -13
  102. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  103. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  104. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  105. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  106. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +52 -58
  107. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  108. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  109. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +30 -1
  110. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  111. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  112. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  113. inspect_ai/_view/www/src/state/hooks.ts +397 -0
  114. inspect_ai/_view/www/src/state/logPolling.ts +196 -0
  115. inspect_ai/_view/www/src/state/logSlice.ts +214 -0
  116. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  117. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  118. inspect_ai/_view/www/src/state/samplePolling.ts +311 -0
  119. inspect_ai/_view/www/src/state/sampleSlice.ts +127 -0
  120. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  121. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  122. inspect_ai/_view/www/src/state/store.ts +168 -0
  123. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  124. inspect_ai/_view/www/src/state/utils.ts +23 -0
  125. inspect_ai/_view/www/src/storage/index.ts +26 -0
  126. inspect_ai/_view/www/src/types/log.d.ts +2 -0
  127. inspect_ai/_view/www/src/types.ts +94 -32
  128. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  129. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  130. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  131. inspect_ai/_view/www/src/utils/react.ts +30 -0
  132. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  133. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +181 -216
  134. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  135. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  136. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  137. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  138. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +0 -1
  139. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +98 -39
  140. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  141. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  142. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +11 -13
  143. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  144. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  145. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  146. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  147. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  148. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  149. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  150. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +110 -115
  151. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  152. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  153. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  154. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  155. inspect_ai/_view/www/vite.config.js +6 -0
  156. inspect_ai/_view/www/yarn.lock +370 -354
  157. inspect_ai/log/_condense.py +26 -0
  158. inspect_ai/log/_log.py +6 -3
  159. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  160. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  161. inspect_ai/log/_recorders/buffer/database.py +685 -0
  162. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  163. inspect_ai/log/_recorders/buffer/types.py +84 -0
  164. inspect_ai/log/_recorders/eval.py +2 -11
  165. inspect_ai/log/_recorders/types.py +30 -0
  166. inspect_ai/log/_transcript.py +27 -1
  167. inspect_ai/model/_call_tools.py +1 -0
  168. inspect_ai/model/_generate_config.py +2 -2
  169. inspect_ai/model/_model.py +1 -0
  170. inspect_ai/tool/_tool_support_helpers.py +4 -4
  171. inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
  172. inspect_ai/util/_subtask.py +1 -0
  173. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/METADATA +1 -1
  174. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/RECORD +178 -138
  175. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  176. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/WHEEL +0 -0
  177. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/entry_points.txt +0 -0
  178. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/licenses/LICENSE +0 -0
  179. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,259 @@
1
+ import os
2
+ import tempfile
3
+ from logging import getLogger
4
+ from pathlib import Path
5
+ from typing import Literal
6
+ from zipfile import ZIP_DEFLATED, ZipFile
7
+
8
+ from pydantic import BaseModel, Field
9
+ from typing_extensions import override
10
+
11
+ from inspect_ai._display.core.display import TaskDisplayMetric
12
+ from inspect_ai._util.constants import DEFAULT_LOG_SHARED, EVAL_LOG_FORMAT
13
+ from inspect_ai._util.file import FileSystem, basename, dirname, file, filesystem
14
+ from inspect_ai._util.json import to_json_safe, to_json_str_safe
15
+ from inspect_ai.log._file import read_eval_log
16
+
17
+ from ..types import SampleSummary
18
+ from .types import SampleBuffer, SampleData, Samples
19
+
20
+ logger = getLogger(__name__)
21
+
22
+
23
+ class Segment(BaseModel):
24
+ id: int
25
+ last_event_id: int
26
+ last_attachment_id: int
27
+
28
+
29
+ class SegmentFile(BaseModel):
30
+ id: str | int
31
+ epoch: int
32
+ data: SampleData
33
+
34
+
35
+ class SampleManifest(BaseModel):
36
+ summary: SampleSummary
37
+ segments: list[int] = Field(default_factory=list)
38
+
39
+
40
+ class Manifest(BaseModel):
41
+ metrics: list[TaskDisplayMetric] = Field(default_factory=list)
42
+ samples: list[SampleManifest] = Field(default_factory=list)
43
+ segments: list[Segment] = Field(default_factory=list)
44
+
45
+
46
+ MANIFEST = "manifest.json"
47
+
48
+
49
+ class SampleBufferFilestore(SampleBuffer):
50
+ def __init__(
51
+ self,
52
+ location: str,
53
+ *,
54
+ create: bool = True,
55
+ update_interval: int = DEFAULT_LOG_SHARED,
56
+ ) -> None:
57
+ self._fs = filesystem(location)
58
+ self._dir = f"{sample_buffer_dir(dirname(location), self._fs)}{self._fs.sep}{os.path.splitext(basename(location))[0]}{self._fs.sep}"
59
+ self.update_interval = update_interval
60
+
61
+ if create:
62
+ self._fs.mkdir(self._dir, exist_ok=True)
63
+
64
+ # place a file in the dir to force it to be created
65
+ self._fs.touch(f"{self._dir}.keep")
66
+
67
+ def write_manifest(self, manifest: Manifest) -> None:
68
+ with file(self._manifest_file(), "wb") as f:
69
+ f.write(to_json_safe(manifest))
70
+
71
+ def write_segment(self, id: int, files: list[SegmentFile]) -> None:
72
+ # write the file locally
73
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False) as segment_file:
74
+ name = segment_file.name
75
+ with ZipFile(
76
+ segment_file, mode="w", compression=ZIP_DEFLATED, compresslevel=5
77
+ ) as zip:
78
+ for sf in files:
79
+ zip.writestr(
80
+ segment_file_name(sf.id, sf.epoch),
81
+ to_json_str_safe(sf.data),
82
+ )
83
+ segment_file.flush()
84
+ os.fsync(segment_file.fileno())
85
+
86
+ # write then move for atomicity
87
+ try:
88
+ with open(name, "rb") as zf:
89
+ with file(f"{self._dir}{segment_name(id)}", "wb") as f:
90
+ f.write(zf.read())
91
+ f.flush()
92
+ finally:
93
+ os.unlink(name)
94
+
95
+ def read_manifest(self) -> Manifest | None:
96
+ try:
97
+ with file(self._manifest_file(), "r") as f:
98
+ contents = f.read()
99
+ return Manifest.model_validate_json(contents)
100
+ except FileNotFoundError:
101
+ return None
102
+
103
+ def read_segment_data(
104
+ self, id: int, sample_id: str | int, epoch_id: int
105
+ ) -> SampleData:
106
+ segment_file = f"{self._dir}{segment_name(id)}"
107
+ with file(segment_file, "rb") as f:
108
+ with ZipFile(f, mode="r") as zip:
109
+ with zip.open(segment_file_name(sample_id, epoch_id), "r") as sf:
110
+ return SampleData.model_validate_json(sf.read())
111
+
112
+ def cleanup(self) -> None:
113
+ cleanup_sample_buffer_filestore(self._dir, self._fs)
114
+
115
+ @classmethod
116
+ @override
117
+ def running_tasks(cls, log_dir: str) -> list[str] | None:
118
+ buffer_dir = Path(sample_buffer_dir(log_dir))
119
+ if buffer_dir.exists():
120
+ return [
121
+ f"{basename(path.name)}.{EVAL_LOG_FORMAT}"
122
+ for path in buffer_dir.iterdir()
123
+ if path.is_dir()
124
+ ]
125
+ else:
126
+ return None
127
+
128
+ @override
129
+ def get_samples(
130
+ self, etag: str | None = None
131
+ ) -> Samples | Literal["NotModified"] | None:
132
+ # get the etag on the filestore
133
+ try:
134
+ info = self._fs.info(self._manifest_file())
135
+ fs_etag = info.etag or f"{info.mtime}{info.size}"
136
+ except FileNotFoundError:
137
+ return None
138
+
139
+ # if the etag matches then return not modified
140
+ if etag == fs_etag:
141
+ return "NotModified"
142
+
143
+ # read the manifest
144
+ manifest = self.read_manifest()
145
+ if manifest is None:
146
+ return None
147
+
148
+ # provide samples + etag from the manifest
149
+ return Samples(
150
+ samples=[sm.summary for sm in manifest.samples],
151
+ metrics=manifest.metrics,
152
+ refresh=self.update_interval,
153
+ etag=fs_etag,
154
+ )
155
+
156
+ @override
157
+ def get_sample_data(
158
+ self,
159
+ id: str | int,
160
+ epoch: int,
161
+ after_event_id: int | None = None,
162
+ after_attachment_id: int | None = None,
163
+ ) -> SampleData | None:
164
+ # read the manifest
165
+ manifest = self.read_manifest()
166
+ if manifest is None:
167
+ return None
168
+
169
+ # find this sample in the manifest
170
+ sample = next(
171
+ (
172
+ sample
173
+ for sample in manifest.samples
174
+ if sample.summary.id == id and sample.summary.epoch == epoch
175
+ ),
176
+ None,
177
+ )
178
+ if sample is None:
179
+ return None
180
+
181
+ # determine which segments we need to return in order to
182
+ # satisfy the after_event_id and after_attachment_id
183
+ after_event_id = after_event_id or -1
184
+ after_attachment_id = after_attachment_id or -1
185
+ segments = [
186
+ segment for segment in manifest.segments if segment.id in sample.segments
187
+ ]
188
+ segments = [
189
+ segment
190
+ for segment in segments
191
+ if segment.last_event_id > after_event_id
192
+ or segment.last_attachment_id > after_attachment_id
193
+ ]
194
+
195
+ # collect data from the segments
196
+ sample_data = SampleData(events=[], attachments=[])
197
+ for segment in segments:
198
+ data = self.read_segment_data(segment.id, id, epoch)
199
+ sample_data.events.extend(data.events)
200
+ sample_data.attachments.extend(data.attachments)
201
+
202
+ return sample_data
203
+
204
+ def _manifest_file(self) -> str:
205
+ return f"{self._dir}{MANIFEST}"
206
+
207
+
208
+ def cleanup_sample_buffer_filestores(log_dir: str) -> None:
209
+ # read log buffer dirs (bail if there is no buffer_dir)
210
+ fs = filesystem(log_dir)
211
+ buffer_dir = sample_buffer_dir(log_dir, fs)
212
+ try:
213
+ log_buffers = [
214
+ buffer for buffer in fs.ls(buffer_dir) if buffer.type == "directory"
215
+ ]
216
+ except FileNotFoundError:
217
+ return
218
+
219
+ # for each buffer dir, confirm there is a running .eval file
220
+ # (remove the buffer dir if there is no .eval or the eval is finished)
221
+ for log_buffer in log_buffers:
222
+ try:
223
+ log_file = f"{log_dir}{fs.sep}{basename(log_buffer.name)}.{EVAL_LOG_FORMAT}"
224
+ log_header = read_eval_log(log_file, header_only=True)
225
+ if log_header.status != "started":
226
+ cleanup_sample_buffer_filestore(log_buffer.name, fs)
227
+
228
+ except FileNotFoundError:
229
+ cleanup_sample_buffer_filestore(log_buffer.name, fs)
230
+
231
+ # remove the .buffer dir if it's empty
232
+ try:
233
+ if len(fs.ls(buffer_dir)) == 0:
234
+ fs.rm(buffer_dir, recursive=True)
235
+ except FileNotFoundError:
236
+ pass
237
+
238
+
239
+ def cleanup_sample_buffer_filestore(buffer_dir: str, fs: FileSystem) -> None:
240
+ try:
241
+ fs.rm(buffer_dir, recursive=True)
242
+ except Exception as ex:
243
+ logger.warning(
244
+ f"Error cleaning up sample buffer database at {buffer_dir}: {ex}"
245
+ )
246
+
247
+
248
+ def segment_name(id: int) -> str:
249
+ return f"segment.{id}.zip"
250
+
251
+
252
+ def segment_file_name(id: str | int, epoch: int) -> str:
253
+ return f"{id}_{epoch}.json"
254
+
255
+
256
+ def sample_buffer_dir(log_dir: str, fs: FileSystem | None = None) -> str:
257
+ log_dir = log_dir.rstrip("/\\")
258
+ fs = fs or filesystem(log_dir)
259
+ return f"{log_dir}{fs.sep}.buffer"
@@ -0,0 +1,84 @@
1
+ import abc
2
+ from typing import Literal, TypeAlias
3
+
4
+ from pydantic import BaseModel, JsonValue
5
+
6
+ from inspect_ai._display.core.display import TaskDisplayMetric
7
+
8
+ from ..types import SampleSummary
9
+
10
+ JsonData: TypeAlias = dict[str, JsonValue]
11
+
12
+
13
+ class Samples(BaseModel):
14
+ samples: list[SampleSummary]
15
+ metrics: list[TaskDisplayMetric]
16
+ refresh: int
17
+ etag: str
18
+
19
+
20
+ class EventData(BaseModel):
21
+ id: int
22
+ event_id: str
23
+ sample_id: str
24
+ epoch: int
25
+ event: JsonData
26
+
27
+
28
+ class AttachmentData(BaseModel):
29
+ id: int
30
+ sample_id: str
31
+ epoch: int
32
+ hash: str
33
+ content: str
34
+
35
+
36
+ class SampleData(BaseModel):
37
+ events: list[EventData]
38
+ attachments: list[AttachmentData]
39
+
40
+
41
+ class SampleBuffer(abc.ABC):
42
+ @classmethod
43
+ @abc.abstractmethod
44
+ def running_tasks(cls, log_dir: str) -> list[str] | None: ...
45
+
46
+ @abc.abstractmethod
47
+ def get_samples(
48
+ self, etag: str | None = None
49
+ ) -> Samples | Literal["NotModified"] | None:
50
+ """Get the manifest of all running samples.
51
+
52
+ Args:
53
+ etag: Optional etag (returned in `Samples`) for checking
54
+ whether there are any changes in the datatabase.
55
+
56
+ Returns:
57
+ - `Samples` if the database exists and has updates
58
+ - "NotModifed" if the database exists and has no updates.
59
+ - None if the database no longer exists
60
+
61
+ """
62
+ ...
63
+
64
+ @abc.abstractmethod
65
+ def get_sample_data(
66
+ self,
67
+ id: str | int,
68
+ epoch: int,
69
+ after_event_id: int | None = None,
70
+ after_attachment_id: int | None = None,
71
+ ) -> SampleData | None:
72
+ """Get event and attachment data for a sample.
73
+
74
+ Args:
75
+ id: Sample id
76
+ epoch: Sample epoch
77
+ after_event_id: Optional. Fetch only event data greater than this id.
78
+ after_attachment_id: Optioinal. Fetch only attachment data greater than this id.
79
+
80
+ Returns:
81
+ - `SampleData` with event and attachment data.
82
+ - None if the database no longer exists
83
+ """
84
+ ...
@@ -23,7 +23,6 @@ from inspect_ai._util.file import FileSystem, dirname, file, filesystem
23
23
  from inspect_ai._util.json import jsonable_python
24
24
  from inspect_ai._util.trace import trace_action
25
25
  from inspect_ai.model._chat_message import ChatMessage
26
- from inspect_ai.scorer._metric import Score
27
26
 
28
27
  from .._log import (
29
28
  EvalLog,
@@ -36,20 +35,11 @@ from .._log import (
36
35
  sort_samples,
37
36
  )
38
37
  from .file import FileRecorder
38
+ from .types import SampleSummary
39
39
 
40
40
  logger = getLogger(__name__)
41
41
 
42
42
 
43
- class SampleSummary(BaseModel):
44
- id: int | str
45
- epoch: int
46
- input: str | list[ChatMessage]
47
- target: str | list[str]
48
- scores: dict[str, Score] | None = Field(default=None)
49
- error: str | None = Field(default=None)
50
- limit: str | None = Field(default=None)
51
-
52
-
53
43
  class LogStart(BaseModel):
54
44
  version: int
55
45
  eval: EvalSpec
@@ -331,6 +321,7 @@ class ZipLogFile:
331
321
  epoch=sample.epoch,
332
322
  input=text_inputs(sample.input),
333
323
  target=sample.target,
324
+ completed=True,
334
325
  scores=sample.scores,
335
326
  error=sample.error.message
336
327
  if sample.error is not None
@@ -0,0 +1,30 @@
1
+ from pydantic import BaseModel, Field, model_validator
2
+
3
+ from inspect_ai.log._transcript import Event
4
+ from inspect_ai.model._chat_message import ChatMessage
5
+ from inspect_ai.scorer._metric import Score
6
+
7
+
8
+ class SampleEvent(BaseModel):
9
+ id: str | int
10
+ epoch: int
11
+ event: Event
12
+
13
+
14
+ class SampleSummary(BaseModel):
15
+ id: int | str
16
+ epoch: int
17
+ input: str | list[ChatMessage]
18
+ target: str | list[str]
19
+ completed: bool = Field(default=False)
20
+ scores: dict[str, Score] | None = Field(default=None)
21
+ error: str | None = Field(default=None)
22
+ limit: str | None = Field(default=None)
23
+
24
+ @model_validator(mode="after")
25
+ def thin_scores(self) -> "SampleSummary":
26
+ if self.scores is not None:
27
+ self.scores = {
28
+ key: Score(value=score.value) for key, score in self.scores.items()
29
+ }
30
+ return self
@@ -14,7 +14,14 @@ from typing import (
14
14
  Union,
15
15
  )
16
16
 
17
- from pydantic import BaseModel, ConfigDict, Field, JsonValue, field_serializer
17
+ from pydantic import (
18
+ BaseModel,
19
+ ConfigDict,
20
+ Field,
21
+ JsonValue,
22
+ field_serializer,
23
+ )
24
+ from shortuuid import uuid
18
25
 
19
26
  from inspect_ai._util.constants import SAMPLE_SUBTASK
20
27
  from inspect_ai._util.error import EvalError
@@ -43,6 +50,13 @@ logger = getLogger(__name__)
43
50
 
44
51
 
45
52
  class BaseEvent(BaseModel):
53
+ model_config = {
54
+ "json_schema_extra": lambda schema: schema.get("properties", {}).pop(
55
+ "id_", None
56
+ )
57
+ }
58
+ id_: str = Field(default_factory=lambda: str(uuid()), exclude=True)
59
+
46
60
  timestamp: datetime = Field(default_factory=datetime.now)
47
61
  """Clock time at which event occurred."""
48
62
 
@@ -451,8 +465,11 @@ ET = TypeVar("ET", bound=BaseEvent)
451
465
  class Transcript:
452
466
  """Transcript of events."""
453
467
 
468
+ _event_logger: Callable[[Event], None] | None
469
+
454
470
  def __init__(self, name: str = "") -> None:
455
471
  self.name = name
472
+ self._event_logger = None
456
473
  self._events: list[Event] = []
457
474
 
458
475
  def info(self, data: JsonValue, *, source: str | None = None) -> None:
@@ -493,8 +510,17 @@ class Transcript:
493
510
  return None
494
511
 
495
512
  def _event(self, event: Event) -> None:
513
+ if self._event_logger:
514
+ self._event_logger(event)
496
515
  self._events.append(event)
497
516
 
517
+ def _event_updated(self, event: Event) -> None:
518
+ if self._event_logger:
519
+ self._event_logger(event)
520
+
521
+ def _subscribe(self, event_logger: Callable[[Event], None]) -> None:
522
+ self._event_logger = event_logger
523
+
498
524
 
499
525
  def transcript() -> Transcript:
500
526
  """Get the current `Transcript`."""
@@ -262,6 +262,7 @@ async def call_tools(
262
262
  events=result_event.events,
263
263
  waiting_time=waiting_time_end - waiting_time_start,
264
264
  )
265
+ transcript()._event_updated(event)
265
266
 
266
267
  # return tool messages
267
268
  return tool_messages
@@ -92,7 +92,7 @@ class GenerateConfigArgs(TypedDict, total=False):
92
92
  """Whether to cache the prompt prefix. Defaults to "auto", which will enable caching for requests with tools. Anthropic only."""
93
93
 
94
94
  reasoning_effort: Literal["low", "medium", "high"] | None
95
- """Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
95
+ """Constrains effort on reasoning for reasoning models (defaults to `medium`). Open AI o1 models only."""
96
96
 
97
97
  reasoning_tokens: int | None
98
98
  """Maximum number of tokens to use for reasoning. Anthropic Claude models only."""
@@ -171,7 +171,7 @@ class GenerateConfig(BaseModel):
171
171
  """Whether to cache the prompt prefix. Defaults to "auto", which will enable caching for requests with tools. Anthropic only."""
172
172
 
173
173
  reasoning_effort: Literal["low", "medium", "high"] | None = Field(default=None)
174
- """Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
174
+ """Constrains effort on reasoning for reasoning models (defaults to `medium`). Open AI o1 models only."""
175
175
 
176
176
  reasoning_tokens: int | None = Field(default=None)
177
177
  """Maximum number of tokens to use for reasoning. Anthropic Claude models only."""
@@ -672,6 +672,7 @@ class Model:
672
672
 
673
673
  event.call = updated_call
674
674
  event.pending = None
675
+ transcript()._event_updated(event)
675
676
 
676
677
  # if we have output then complete it now
677
678
  if output:
@@ -128,10 +128,10 @@ async def tool_container_sandbox(tool_name: str) -> SandboxEnvironment:
128
128
 
129
129
  Alternatively, you can include the service into your own Dockerfile:
130
130
 
131
- RUN python -m venv /opt/inspect_tool_support
132
- ENV PATH="/opt/inspect_tool_support/bin:$PATH"
133
- RUN pip install inspect-tool-support
134
- RUN inspect-tool-support post-install
131
+ ENV PATH="$PATH:/opt/inspect_tool_support/bin"
132
+ RUN python -m venv /opt/inspect_tool_support && \
133
+ /opt/inspect_tool_support/bin/pip install inspect-tool-support && \
134
+ /opt/inspect_tool_support/bin/inspect-tool-support post-install
135
135
  """).strip()
136
136
  raise PrerequisiteError(msg)
137
137
 
@@ -363,7 +363,9 @@ async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolRes
363
363
  # The user may have the old, incompatible, sandbox. If so, use that and
364
364
  # execute the old compatible code.
365
365
  try:
366
- return await old_web_browser_cmd(tool_name, *params)
366
+ return await old_web_browser_cmd(
367
+ tool_name, *(str(value) for value in params.values())
368
+ )
367
369
  except PrerequisiteError:
368
370
  raise e
369
371
 
@@ -152,6 +152,7 @@ def subtask(
152
152
  event.result = result
153
153
  event.events = events
154
154
  event.pending = None
155
+ transcript()._event_updated(event)
155
156
 
156
157
  # return result
157
158
  return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.81
3
+ Version: 0.3.82
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License