inspect-ai 0.3.81__py3-none-any.whl → 0.3.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +35 -2
- inspect_ai/_cli/util.py +44 -1
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +13 -4
- inspect_ai/_display/core/results.py +1 -1
- inspect_ai/_display/textual/widgets/task_detail.py +5 -4
- inspect_ai/_eval/eval.py +38 -1
- inspect_ai/_eval/evalset.py +5 -0
- inspect_ai/_eval/run.py +5 -2
- inspect_ai/_eval/task/log.py +53 -6
- inspect_ai/_eval/task/run.py +51 -10
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/file.py +17 -1
- inspect_ai/_util/json.py +36 -1
- inspect_ai/_view/server.py +113 -1
- inspect_ai/_view/www/App.css +1 -1
- inspect_ai/_view/www/dist/assets/index.css +518 -296
- inspect_ai/_view/www/dist/assets/index.js +38803 -36307
- inspect_ai/_view/www/eslint.config.mjs +1 -1
- inspect_ai/_view/www/log-schema.json +13 -0
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/package.json +8 -2
- inspect_ai/_view/www/src/App.tsx +151 -855
- inspect_ai/_view/www/src/api/api-browser.ts +176 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
- inspect_ai/_view/www/src/api/client-api.ts +66 -10
- inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
- inspect_ai/_view/www/src/api/types.ts +107 -2
- inspect_ai/_view/www/src/appearance/icons.ts +1 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
- inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
- inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
- inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -3
- inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
- inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
- inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
- inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
- inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
- inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
- inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
- inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
- inspect_ai/_view/www/src/index.tsx +26 -94
- inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +67 -28
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +51 -22
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +144 -90
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +82 -35
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +23 -30
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +4 -1
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +3 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +34 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +10 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +25 -17
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +2 -1
- inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +21 -3
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +20 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +105 -85
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +27 -14
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
- inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
- inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +7 -9
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +7 -11
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +8 -13
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +52 -58
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +30 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
- inspect_ai/_view/www/src/scoring/utils.ts +87 -0
- inspect_ai/_view/www/src/state/appSlice.ts +244 -0
- inspect_ai/_view/www/src/state/hooks.ts +397 -0
- inspect_ai/_view/www/src/state/logPolling.ts +196 -0
- inspect_ai/_view/www/src/state/logSlice.ts +214 -0
- inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
- inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +311 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +127 -0
- inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
- inspect_ai/_view/www/src/state/scrolling.ts +206 -0
- inspect_ai/_view/www/src/state/store.ts +168 -0
- inspect_ai/_view/www/src/state/store_filter.ts +84 -0
- inspect_ai/_view/www/src/state/utils.ts +23 -0
- inspect_ai/_view/www/src/storage/index.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +2 -0
- inspect_ai/_view/www/src/types.ts +94 -32
- inspect_ai/_view/www/src/utils/attachments.ts +58 -23
- inspect_ai/_view/www/src/utils/logger.ts +52 -0
- inspect_ai/_view/www/src/utils/polling.ts +100 -0
- inspect_ai/_view/www/src/utils/react.ts +30 -0
- inspect_ai/_view/www/src/utils/vscode.ts +1 -1
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +181 -216
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +0 -1
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +98 -39
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +11 -13
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +110 -115
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
- inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
- inspect_ai/_view/www/src/workspace/types.ts +4 -3
- inspect_ai/_view/www/src/workspace/utils.ts +4 -4
- inspect_ai/_view/www/vite.config.js +6 -0
- inspect_ai/_view/www/yarn.lock +370 -354
- inspect_ai/log/_condense.py +26 -0
- inspect_ai/log/_log.py +6 -3
- inspect_ai/log/_recorders/buffer/__init__.py +14 -0
- inspect_ai/log/_recorders/buffer/buffer.py +30 -0
- inspect_ai/log/_recorders/buffer/database.py +685 -0
- inspect_ai/log/_recorders/buffer/filestore.py +259 -0
- inspect_ai/log/_recorders/buffer/types.py +84 -0
- inspect_ai/log/_recorders/eval.py +2 -11
- inspect_ai/log/_recorders/types.py +30 -0
- inspect_ai/log/_transcript.py +27 -1
- inspect_ai/model/_call_tools.py +1 -0
- inspect_ai/model/_generate_config.py +2 -2
- inspect_ai/model/_model.py +1 -0
- inspect_ai/tool/_tool_support_helpers.py +4 -4
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
- inspect_ai/util/_subtask.py +1 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/RECORD +178 -138
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,259 @@
|
|
1
|
+
import os
|
2
|
+
import tempfile
|
3
|
+
from logging import getLogger
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Literal
|
6
|
+
from zipfile import ZIP_DEFLATED, ZipFile
|
7
|
+
|
8
|
+
from pydantic import BaseModel, Field
|
9
|
+
from typing_extensions import override
|
10
|
+
|
11
|
+
from inspect_ai._display.core.display import TaskDisplayMetric
|
12
|
+
from inspect_ai._util.constants import DEFAULT_LOG_SHARED, EVAL_LOG_FORMAT
|
13
|
+
from inspect_ai._util.file import FileSystem, basename, dirname, file, filesystem
|
14
|
+
from inspect_ai._util.json import to_json_safe, to_json_str_safe
|
15
|
+
from inspect_ai.log._file import read_eval_log
|
16
|
+
|
17
|
+
from ..types import SampleSummary
|
18
|
+
from .types import SampleBuffer, SampleData, Samples
|
19
|
+
|
20
|
+
logger = getLogger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
class Segment(BaseModel):
|
24
|
+
id: int
|
25
|
+
last_event_id: int
|
26
|
+
last_attachment_id: int
|
27
|
+
|
28
|
+
|
29
|
+
class SegmentFile(BaseModel):
|
30
|
+
id: str | int
|
31
|
+
epoch: int
|
32
|
+
data: SampleData
|
33
|
+
|
34
|
+
|
35
|
+
class SampleManifest(BaseModel):
|
36
|
+
summary: SampleSummary
|
37
|
+
segments: list[int] = Field(default_factory=list)
|
38
|
+
|
39
|
+
|
40
|
+
class Manifest(BaseModel):
|
41
|
+
metrics: list[TaskDisplayMetric] = Field(default_factory=list)
|
42
|
+
samples: list[SampleManifest] = Field(default_factory=list)
|
43
|
+
segments: list[Segment] = Field(default_factory=list)
|
44
|
+
|
45
|
+
|
46
|
+
MANIFEST = "manifest.json"
|
47
|
+
|
48
|
+
|
49
|
+
class SampleBufferFilestore(SampleBuffer):
|
50
|
+
def __init__(
|
51
|
+
self,
|
52
|
+
location: str,
|
53
|
+
*,
|
54
|
+
create: bool = True,
|
55
|
+
update_interval: int = DEFAULT_LOG_SHARED,
|
56
|
+
) -> None:
|
57
|
+
self._fs = filesystem(location)
|
58
|
+
self._dir = f"{sample_buffer_dir(dirname(location), self._fs)}{self._fs.sep}{os.path.splitext(basename(location))[0]}{self._fs.sep}"
|
59
|
+
self.update_interval = update_interval
|
60
|
+
|
61
|
+
if create:
|
62
|
+
self._fs.mkdir(self._dir, exist_ok=True)
|
63
|
+
|
64
|
+
# place a file in the dir to force it to be created
|
65
|
+
self._fs.touch(f"{self._dir}.keep")
|
66
|
+
|
67
|
+
def write_manifest(self, manifest: Manifest) -> None:
|
68
|
+
with file(self._manifest_file(), "wb") as f:
|
69
|
+
f.write(to_json_safe(manifest))
|
70
|
+
|
71
|
+
def write_segment(self, id: int, files: list[SegmentFile]) -> None:
|
72
|
+
# write the file locally
|
73
|
+
with tempfile.NamedTemporaryFile(mode="wb", delete=False) as segment_file:
|
74
|
+
name = segment_file.name
|
75
|
+
with ZipFile(
|
76
|
+
segment_file, mode="w", compression=ZIP_DEFLATED, compresslevel=5
|
77
|
+
) as zip:
|
78
|
+
for sf in files:
|
79
|
+
zip.writestr(
|
80
|
+
segment_file_name(sf.id, sf.epoch),
|
81
|
+
to_json_str_safe(sf.data),
|
82
|
+
)
|
83
|
+
segment_file.flush()
|
84
|
+
os.fsync(segment_file.fileno())
|
85
|
+
|
86
|
+
# write then move for atomicity
|
87
|
+
try:
|
88
|
+
with open(name, "rb") as zf:
|
89
|
+
with file(f"{self._dir}{segment_name(id)}", "wb") as f:
|
90
|
+
f.write(zf.read())
|
91
|
+
f.flush()
|
92
|
+
finally:
|
93
|
+
os.unlink(name)
|
94
|
+
|
95
|
+
def read_manifest(self) -> Manifest | None:
|
96
|
+
try:
|
97
|
+
with file(self._manifest_file(), "r") as f:
|
98
|
+
contents = f.read()
|
99
|
+
return Manifest.model_validate_json(contents)
|
100
|
+
except FileNotFoundError:
|
101
|
+
return None
|
102
|
+
|
103
|
+
def read_segment_data(
|
104
|
+
self, id: int, sample_id: str | int, epoch_id: int
|
105
|
+
) -> SampleData:
|
106
|
+
segment_file = f"{self._dir}{segment_name(id)}"
|
107
|
+
with file(segment_file, "rb") as f:
|
108
|
+
with ZipFile(f, mode="r") as zip:
|
109
|
+
with zip.open(segment_file_name(sample_id, epoch_id), "r") as sf:
|
110
|
+
return SampleData.model_validate_json(sf.read())
|
111
|
+
|
112
|
+
def cleanup(self) -> None:
|
113
|
+
cleanup_sample_buffer_filestore(self._dir, self._fs)
|
114
|
+
|
115
|
+
@classmethod
|
116
|
+
@override
|
117
|
+
def running_tasks(cls, log_dir: str) -> list[str] | None:
|
118
|
+
buffer_dir = Path(sample_buffer_dir(log_dir))
|
119
|
+
if buffer_dir.exists():
|
120
|
+
return [
|
121
|
+
f"{basename(path.name)}.{EVAL_LOG_FORMAT}"
|
122
|
+
for path in buffer_dir.iterdir()
|
123
|
+
if path.is_dir()
|
124
|
+
]
|
125
|
+
else:
|
126
|
+
return None
|
127
|
+
|
128
|
+
@override
|
129
|
+
def get_samples(
|
130
|
+
self, etag: str | None = None
|
131
|
+
) -> Samples | Literal["NotModified"] | None:
|
132
|
+
# get the etag on the filestore
|
133
|
+
try:
|
134
|
+
info = self._fs.info(self._manifest_file())
|
135
|
+
fs_etag = info.etag or f"{info.mtime}{info.size}"
|
136
|
+
except FileNotFoundError:
|
137
|
+
return None
|
138
|
+
|
139
|
+
# if the etag matches then return not modified
|
140
|
+
if etag == fs_etag:
|
141
|
+
return "NotModified"
|
142
|
+
|
143
|
+
# read the manifest
|
144
|
+
manifest = self.read_manifest()
|
145
|
+
if manifest is None:
|
146
|
+
return None
|
147
|
+
|
148
|
+
# provide samples + etag from the manifest
|
149
|
+
return Samples(
|
150
|
+
samples=[sm.summary for sm in manifest.samples],
|
151
|
+
metrics=manifest.metrics,
|
152
|
+
refresh=self.update_interval,
|
153
|
+
etag=fs_etag,
|
154
|
+
)
|
155
|
+
|
156
|
+
@override
|
157
|
+
def get_sample_data(
|
158
|
+
self,
|
159
|
+
id: str | int,
|
160
|
+
epoch: int,
|
161
|
+
after_event_id: int | None = None,
|
162
|
+
after_attachment_id: int | None = None,
|
163
|
+
) -> SampleData | None:
|
164
|
+
# read the manifest
|
165
|
+
manifest = self.read_manifest()
|
166
|
+
if manifest is None:
|
167
|
+
return None
|
168
|
+
|
169
|
+
# find this sample in the manifest
|
170
|
+
sample = next(
|
171
|
+
(
|
172
|
+
sample
|
173
|
+
for sample in manifest.samples
|
174
|
+
if sample.summary.id == id and sample.summary.epoch == epoch
|
175
|
+
),
|
176
|
+
None,
|
177
|
+
)
|
178
|
+
if sample is None:
|
179
|
+
return None
|
180
|
+
|
181
|
+
# determine which segments we need to return in order to
|
182
|
+
# satisfy the after_event_id and after_attachment_id
|
183
|
+
after_event_id = after_event_id or -1
|
184
|
+
after_attachment_id = after_attachment_id or -1
|
185
|
+
segments = [
|
186
|
+
segment for segment in manifest.segments if segment.id in sample.segments
|
187
|
+
]
|
188
|
+
segments = [
|
189
|
+
segment
|
190
|
+
for segment in segments
|
191
|
+
if segment.last_event_id > after_event_id
|
192
|
+
or segment.last_attachment_id > after_attachment_id
|
193
|
+
]
|
194
|
+
|
195
|
+
# collect data from the segments
|
196
|
+
sample_data = SampleData(events=[], attachments=[])
|
197
|
+
for segment in segments:
|
198
|
+
data = self.read_segment_data(segment.id, id, epoch)
|
199
|
+
sample_data.events.extend(data.events)
|
200
|
+
sample_data.attachments.extend(data.attachments)
|
201
|
+
|
202
|
+
return sample_data
|
203
|
+
|
204
|
+
def _manifest_file(self) -> str:
|
205
|
+
return f"{self._dir}{MANIFEST}"
|
206
|
+
|
207
|
+
|
208
|
+
def cleanup_sample_buffer_filestores(log_dir: str) -> None:
|
209
|
+
# read log buffer dirs (bail if there is no buffer_dir)
|
210
|
+
fs = filesystem(log_dir)
|
211
|
+
buffer_dir = sample_buffer_dir(log_dir, fs)
|
212
|
+
try:
|
213
|
+
log_buffers = [
|
214
|
+
buffer for buffer in fs.ls(buffer_dir) if buffer.type == "directory"
|
215
|
+
]
|
216
|
+
except FileNotFoundError:
|
217
|
+
return
|
218
|
+
|
219
|
+
# for each buffer dir, confirm there is a running .eval file
|
220
|
+
# (remove the buffer dir if there is no .eval or the eval is finished)
|
221
|
+
for log_buffer in log_buffers:
|
222
|
+
try:
|
223
|
+
log_file = f"{log_dir}{fs.sep}{basename(log_buffer.name)}.{EVAL_LOG_FORMAT}"
|
224
|
+
log_header = read_eval_log(log_file, header_only=True)
|
225
|
+
if log_header.status != "started":
|
226
|
+
cleanup_sample_buffer_filestore(log_buffer.name, fs)
|
227
|
+
|
228
|
+
except FileNotFoundError:
|
229
|
+
cleanup_sample_buffer_filestore(log_buffer.name, fs)
|
230
|
+
|
231
|
+
# remove the .buffer dir if it's empty
|
232
|
+
try:
|
233
|
+
if len(fs.ls(buffer_dir)) == 0:
|
234
|
+
fs.rm(buffer_dir, recursive=True)
|
235
|
+
except FileNotFoundError:
|
236
|
+
pass
|
237
|
+
|
238
|
+
|
239
|
+
def cleanup_sample_buffer_filestore(buffer_dir: str, fs: FileSystem) -> None:
|
240
|
+
try:
|
241
|
+
fs.rm(buffer_dir, recursive=True)
|
242
|
+
except Exception as ex:
|
243
|
+
logger.warning(
|
244
|
+
f"Error cleaning up sample buffer database at {buffer_dir}: {ex}"
|
245
|
+
)
|
246
|
+
|
247
|
+
|
248
|
+
def segment_name(id: int) -> str:
|
249
|
+
return f"segment.{id}.zip"
|
250
|
+
|
251
|
+
|
252
|
+
def segment_file_name(id: str | int, epoch: int) -> str:
|
253
|
+
return f"{id}_{epoch}.json"
|
254
|
+
|
255
|
+
|
256
|
+
def sample_buffer_dir(log_dir: str, fs: FileSystem | None = None) -> str:
|
257
|
+
log_dir = log_dir.rstrip("/\\")
|
258
|
+
fs = fs or filesystem(log_dir)
|
259
|
+
return f"{log_dir}{fs.sep}.buffer"
|
@@ -0,0 +1,84 @@
|
|
1
|
+
import abc
|
2
|
+
from typing import Literal, TypeAlias
|
3
|
+
|
4
|
+
from pydantic import BaseModel, JsonValue
|
5
|
+
|
6
|
+
from inspect_ai._display.core.display import TaskDisplayMetric
|
7
|
+
|
8
|
+
from ..types import SampleSummary
|
9
|
+
|
10
|
+
JsonData: TypeAlias = dict[str, JsonValue]
|
11
|
+
|
12
|
+
|
13
|
+
class Samples(BaseModel):
|
14
|
+
samples: list[SampleSummary]
|
15
|
+
metrics: list[TaskDisplayMetric]
|
16
|
+
refresh: int
|
17
|
+
etag: str
|
18
|
+
|
19
|
+
|
20
|
+
class EventData(BaseModel):
|
21
|
+
id: int
|
22
|
+
event_id: str
|
23
|
+
sample_id: str
|
24
|
+
epoch: int
|
25
|
+
event: JsonData
|
26
|
+
|
27
|
+
|
28
|
+
class AttachmentData(BaseModel):
|
29
|
+
id: int
|
30
|
+
sample_id: str
|
31
|
+
epoch: int
|
32
|
+
hash: str
|
33
|
+
content: str
|
34
|
+
|
35
|
+
|
36
|
+
class SampleData(BaseModel):
|
37
|
+
events: list[EventData]
|
38
|
+
attachments: list[AttachmentData]
|
39
|
+
|
40
|
+
|
41
|
+
class SampleBuffer(abc.ABC):
|
42
|
+
@classmethod
|
43
|
+
@abc.abstractmethod
|
44
|
+
def running_tasks(cls, log_dir: str) -> list[str] | None: ...
|
45
|
+
|
46
|
+
@abc.abstractmethod
|
47
|
+
def get_samples(
|
48
|
+
self, etag: str | None = None
|
49
|
+
) -> Samples | Literal["NotModified"] | None:
|
50
|
+
"""Get the manifest of all running samples.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
etag: Optional etag (returned in `Samples`) for checking
|
54
|
+
whether there are any changes in the datatabase.
|
55
|
+
|
56
|
+
Returns:
|
57
|
+
- `Samples` if the database exists and has updates
|
58
|
+
- "NotModifed" if the database exists and has no updates.
|
59
|
+
- None if the database no longer exists
|
60
|
+
|
61
|
+
"""
|
62
|
+
...
|
63
|
+
|
64
|
+
@abc.abstractmethod
|
65
|
+
def get_sample_data(
|
66
|
+
self,
|
67
|
+
id: str | int,
|
68
|
+
epoch: int,
|
69
|
+
after_event_id: int | None = None,
|
70
|
+
after_attachment_id: int | None = None,
|
71
|
+
) -> SampleData | None:
|
72
|
+
"""Get event and attachment data for a sample.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
id: Sample id
|
76
|
+
epoch: Sample epoch
|
77
|
+
after_event_id: Optional. Fetch only event data greater than this id.
|
78
|
+
after_attachment_id: Optioinal. Fetch only attachment data greater than this id.
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
- `SampleData` with event and attachment data.
|
82
|
+
- None if the database no longer exists
|
83
|
+
"""
|
84
|
+
...
|
@@ -23,7 +23,6 @@ from inspect_ai._util.file import FileSystem, dirname, file, filesystem
|
|
23
23
|
from inspect_ai._util.json import jsonable_python
|
24
24
|
from inspect_ai._util.trace import trace_action
|
25
25
|
from inspect_ai.model._chat_message import ChatMessage
|
26
|
-
from inspect_ai.scorer._metric import Score
|
27
26
|
|
28
27
|
from .._log import (
|
29
28
|
EvalLog,
|
@@ -36,20 +35,11 @@ from .._log import (
|
|
36
35
|
sort_samples,
|
37
36
|
)
|
38
37
|
from .file import FileRecorder
|
38
|
+
from .types import SampleSummary
|
39
39
|
|
40
40
|
logger = getLogger(__name__)
|
41
41
|
|
42
42
|
|
43
|
-
class SampleSummary(BaseModel):
|
44
|
-
id: int | str
|
45
|
-
epoch: int
|
46
|
-
input: str | list[ChatMessage]
|
47
|
-
target: str | list[str]
|
48
|
-
scores: dict[str, Score] | None = Field(default=None)
|
49
|
-
error: str | None = Field(default=None)
|
50
|
-
limit: str | None = Field(default=None)
|
51
|
-
|
52
|
-
|
53
43
|
class LogStart(BaseModel):
|
54
44
|
version: int
|
55
45
|
eval: EvalSpec
|
@@ -331,6 +321,7 @@ class ZipLogFile:
|
|
331
321
|
epoch=sample.epoch,
|
332
322
|
input=text_inputs(sample.input),
|
333
323
|
target=sample.target,
|
324
|
+
completed=True,
|
334
325
|
scores=sample.scores,
|
335
326
|
error=sample.error.message
|
336
327
|
if sample.error is not None
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from pydantic import BaseModel, Field, model_validator
|
2
|
+
|
3
|
+
from inspect_ai.log._transcript import Event
|
4
|
+
from inspect_ai.model._chat_message import ChatMessage
|
5
|
+
from inspect_ai.scorer._metric import Score
|
6
|
+
|
7
|
+
|
8
|
+
class SampleEvent(BaseModel):
|
9
|
+
id: str | int
|
10
|
+
epoch: int
|
11
|
+
event: Event
|
12
|
+
|
13
|
+
|
14
|
+
class SampleSummary(BaseModel):
|
15
|
+
id: int | str
|
16
|
+
epoch: int
|
17
|
+
input: str | list[ChatMessage]
|
18
|
+
target: str | list[str]
|
19
|
+
completed: bool = Field(default=False)
|
20
|
+
scores: dict[str, Score] | None = Field(default=None)
|
21
|
+
error: str | None = Field(default=None)
|
22
|
+
limit: str | None = Field(default=None)
|
23
|
+
|
24
|
+
@model_validator(mode="after")
|
25
|
+
def thin_scores(self) -> "SampleSummary":
|
26
|
+
if self.scores is not None:
|
27
|
+
self.scores = {
|
28
|
+
key: Score(value=score.value) for key, score in self.scores.items()
|
29
|
+
}
|
30
|
+
return self
|
inspect_ai/log/_transcript.py
CHANGED
@@ -14,7 +14,14 @@ from typing import (
|
|
14
14
|
Union,
|
15
15
|
)
|
16
16
|
|
17
|
-
from pydantic import
|
17
|
+
from pydantic import (
|
18
|
+
BaseModel,
|
19
|
+
ConfigDict,
|
20
|
+
Field,
|
21
|
+
JsonValue,
|
22
|
+
field_serializer,
|
23
|
+
)
|
24
|
+
from shortuuid import uuid
|
18
25
|
|
19
26
|
from inspect_ai._util.constants import SAMPLE_SUBTASK
|
20
27
|
from inspect_ai._util.error import EvalError
|
@@ -43,6 +50,13 @@ logger = getLogger(__name__)
|
|
43
50
|
|
44
51
|
|
45
52
|
class BaseEvent(BaseModel):
|
53
|
+
model_config = {
|
54
|
+
"json_schema_extra": lambda schema: schema.get("properties", {}).pop(
|
55
|
+
"id_", None
|
56
|
+
)
|
57
|
+
}
|
58
|
+
id_: str = Field(default_factory=lambda: str(uuid()), exclude=True)
|
59
|
+
|
46
60
|
timestamp: datetime = Field(default_factory=datetime.now)
|
47
61
|
"""Clock time at which event occurred."""
|
48
62
|
|
@@ -451,8 +465,11 @@ ET = TypeVar("ET", bound=BaseEvent)
|
|
451
465
|
class Transcript:
|
452
466
|
"""Transcript of events."""
|
453
467
|
|
468
|
+
_event_logger: Callable[[Event], None] | None
|
469
|
+
|
454
470
|
def __init__(self, name: str = "") -> None:
|
455
471
|
self.name = name
|
472
|
+
self._event_logger = None
|
456
473
|
self._events: list[Event] = []
|
457
474
|
|
458
475
|
def info(self, data: JsonValue, *, source: str | None = None) -> None:
|
@@ -493,8 +510,17 @@ class Transcript:
|
|
493
510
|
return None
|
494
511
|
|
495
512
|
def _event(self, event: Event) -> None:
|
513
|
+
if self._event_logger:
|
514
|
+
self._event_logger(event)
|
496
515
|
self._events.append(event)
|
497
516
|
|
517
|
+
def _event_updated(self, event: Event) -> None:
|
518
|
+
if self._event_logger:
|
519
|
+
self._event_logger(event)
|
520
|
+
|
521
|
+
def _subscribe(self, event_logger: Callable[[Event], None]) -> None:
|
522
|
+
self._event_logger = event_logger
|
523
|
+
|
498
524
|
|
499
525
|
def transcript() -> Transcript:
|
500
526
|
"""Get the current `Transcript`."""
|
inspect_ai/model/_call_tools.py
CHANGED
@@ -92,7 +92,7 @@ class GenerateConfigArgs(TypedDict, total=False):
|
|
92
92
|
"""Whether to cache the prompt prefix. Defaults to "auto", which will enable caching for requests with tools. Anthropic only."""
|
93
93
|
|
94
94
|
reasoning_effort: Literal["low", "medium", "high"] | None
|
95
|
-
"""Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
|
95
|
+
"""Constrains effort on reasoning for reasoning models (defaults to `medium`). Open AI o1 models only."""
|
96
96
|
|
97
97
|
reasoning_tokens: int | None
|
98
98
|
"""Maximum number of tokens to use for reasoning. Anthropic Claude models only."""
|
@@ -171,7 +171,7 @@ class GenerateConfig(BaseModel):
|
|
171
171
|
"""Whether to cache the prompt prefix. Defaults to "auto", which will enable caching for requests with tools. Anthropic only."""
|
172
172
|
|
173
173
|
reasoning_effort: Literal["low", "medium", "high"] | None = Field(default=None)
|
174
|
-
"""Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
|
174
|
+
"""Constrains effort on reasoning for reasoning models (defaults to `medium`). Open AI o1 models only."""
|
175
175
|
|
176
176
|
reasoning_tokens: int | None = Field(default=None)
|
177
177
|
"""Maximum number of tokens to use for reasoning. Anthropic Claude models only."""
|
inspect_ai/model/_model.py
CHANGED
@@ -128,10 +128,10 @@ async def tool_container_sandbox(tool_name: str) -> SandboxEnvironment:
|
|
128
128
|
|
129
129
|
Alternatively, you can include the service into your own Dockerfile:
|
130
130
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
131
|
+
ENV PATH="$PATH:/opt/inspect_tool_support/bin"
|
132
|
+
RUN python -m venv /opt/inspect_tool_support && \
|
133
|
+
/opt/inspect_tool_support/bin/pip install inspect-tool-support && \
|
134
|
+
/opt/inspect_tool_support/bin/inspect-tool-support post-install
|
135
135
|
""").strip()
|
136
136
|
raise PrerequisiteError(msg)
|
137
137
|
|
@@ -363,7 +363,9 @@ async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolRes
|
|
363
363
|
# The user may have the old, incompatible, sandbox. If so, use that and
|
364
364
|
# execute the old compatible code.
|
365
365
|
try:
|
366
|
-
return await old_web_browser_cmd(
|
366
|
+
return await old_web_browser_cmd(
|
367
|
+
tool_name, *(str(value) for value in params.values())
|
368
|
+
)
|
367
369
|
except PrerequisiteError:
|
368
370
|
raise e
|
369
371
|
|
inspect_ai/util/_subtask.py
CHANGED