inspect-ai 0.3.49__py3-none-any.whl → 0.3.51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/info.py +2 -2
- inspect_ai/_cli/log.py +2 -2
- inspect_ai/_cli/score.py +2 -2
- inspect_ai/_display/core/display.py +19 -0
- inspect_ai/_display/core/panel.py +37 -7
- inspect_ai/_display/core/progress.py +29 -2
- inspect_ai/_display/core/results.py +79 -40
- inspect_ai/_display/core/textual.py +21 -0
- inspect_ai/_display/rich/display.py +28 -8
- inspect_ai/_display/textual/app.py +107 -1
- inspect_ai/_display/textual/display.py +1 -1
- inspect_ai/_display/textual/widgets/samples.py +132 -91
- inspect_ai/_display/textual/widgets/task_detail.py +236 -0
- inspect_ai/_display/textual/widgets/tasks.py +74 -6
- inspect_ai/_display/textual/widgets/toggle.py +32 -0
- inspect_ai/_eval/context.py +2 -0
- inspect_ai/_eval/eval.py +4 -3
- inspect_ai/_eval/loader.py +1 -1
- inspect_ai/_eval/run.py +35 -2
- inspect_ai/_eval/task/log.py +13 -11
- inspect_ai/_eval/task/results.py +12 -3
- inspect_ai/_eval/task/run.py +139 -36
- inspect_ai/_eval/task/sandbox.py +2 -1
- inspect_ai/_util/_async.py +30 -1
- inspect_ai/_util/file.py +31 -4
- inspect_ai/_util/html.py +3 -0
- inspect_ai/_util/logger.py +6 -5
- inspect_ai/_util/platform.py +5 -6
- inspect_ai/_util/registry.py +1 -1
- inspect_ai/_view/server.py +9 -9
- inspect_ai/_view/www/App.css +2 -2
- inspect_ai/_view/www/dist/assets/index.css +2 -2
- inspect_ai/_view/www/dist/assets/index.js +352 -294
- inspect_ai/_view/www/log-schema.json +13 -0
- inspect_ai/_view/www/package.json +1 -0
- inspect_ai/_view/www/src/components/MessageBand.mjs +1 -1
- inspect_ai/_view/www/src/components/Tools.mjs +16 -13
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -3
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +52 -77
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +15 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +4 -2
- inspect_ai/_view/www/src/types/log.d.ts +2 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +2 -0
- inspect_ai/_view/www/yarn.lock +9 -4
- inspect_ai/approval/__init__.py +1 -1
- inspect_ai/approval/_human/approver.py +35 -0
- inspect_ai/approval/_human/console.py +62 -0
- inspect_ai/approval/_human/manager.py +108 -0
- inspect_ai/approval/_human/panel.py +233 -0
- inspect_ai/approval/_human/util.py +51 -0
- inspect_ai/dataset/_sources/hf.py +2 -2
- inspect_ai/dataset/_sources/util.py +1 -1
- inspect_ai/log/_file.py +106 -36
- inspect_ai/log/_recorders/eval.py +226 -158
- inspect_ai/log/_recorders/file.py +9 -6
- inspect_ai/log/_recorders/json.py +35 -12
- inspect_ai/log/_recorders/recorder.py +15 -15
- inspect_ai/log/_samples.py +52 -0
- inspect_ai/model/_model.py +14 -0
- inspect_ai/model/_model_output.py +4 -0
- inspect_ai/model/_providers/azureai.py +1 -1
- inspect_ai/model/_providers/hf.py +106 -4
- inspect_ai/model/_providers/util/__init__.py +2 -0
- inspect_ai/model/_providers/util/hf_handler.py +200 -0
- inspect_ai/scorer/_common.py +1 -1
- inspect_ai/solver/_plan.py +0 -8
- inspect_ai/solver/_task_state.py +18 -1
- inspect_ai/solver/_use_tools.py +9 -1
- inspect_ai/tool/_tool_def.py +2 -2
- inspect_ai/tool/_tool_info.py +14 -2
- inspect_ai/tool/_tool_params.py +2 -1
- inspect_ai/tool/_tools/_execute.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +6 -0
- inspect_ai/util/__init__.py +5 -6
- inspect_ai/util/_panel.py +91 -0
- inspect_ai/util/_sandbox/__init__.py +2 -6
- inspect_ai/util/_sandbox/context.py +4 -3
- inspect_ai/util/_sandbox/docker/compose.py +12 -2
- inspect_ai/util/_sandbox/docker/docker.py +19 -9
- inspect_ai/util/_sandbox/docker/util.py +10 -2
- inspect_ai/util/_sandbox/environment.py +47 -41
- inspect_ai/util/_sandbox/local.py +15 -10
- inspect_ai/util/_subprocess.py +43 -3
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/RECORD +90 -82
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
- inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
- inspect_ai/approval/_human.py +0 -123
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,13 @@
|
|
1
|
+
import asyncio
|
1
2
|
import json
|
3
|
+
import os
|
2
4
|
import tempfile
|
5
|
+
from contextlib import _AsyncGeneratorContextManager
|
6
|
+
from logging import getLogger
|
3
7
|
from typing import Any, BinaryIO, Literal, cast
|
4
8
|
from zipfile import ZIP_DEFLATED, ZipFile
|
5
9
|
|
10
|
+
from fsspec.asyn import AsyncFileSystem # type: ignore
|
6
11
|
from pydantic import BaseModel, Field
|
7
12
|
from pydantic_core import to_json
|
8
13
|
from typing_extensions import override
|
@@ -10,7 +15,7 @@ from typing_extensions import override
|
|
10
15
|
from inspect_ai._util.constants import LOG_SCHEMA_VERSION
|
11
16
|
from inspect_ai._util.content import ContentImage, ContentText
|
12
17
|
from inspect_ai._util.error import EvalError
|
13
|
-
from inspect_ai._util.file import dirname, file
|
18
|
+
from inspect_ai._util.file import FileSystem, async_fileystem, dirname, file, filesystem
|
14
19
|
from inspect_ai._util.json import jsonable_python
|
15
20
|
from inspect_ai.model._chat_message import ChatMessage
|
16
21
|
from inspect_ai.scorer._metric import Score
|
@@ -27,15 +32,17 @@ from .._log import (
|
|
27
32
|
)
|
28
33
|
from .file import FileRecorder
|
29
34
|
|
35
|
+
logger = getLogger(__name__)
|
36
|
+
|
30
37
|
|
31
38
|
class SampleSummary(BaseModel):
|
32
39
|
id: int | str
|
33
40
|
epoch: int
|
34
41
|
input: str | list[ChatMessage]
|
35
42
|
target: str | list[str]
|
36
|
-
scores: dict[str, Score] | None
|
37
|
-
error: str | None
|
38
|
-
limit: str | None
|
43
|
+
scores: dict[str, Score] | None = Field(default=None)
|
44
|
+
error: str | None = Field(default=None)
|
45
|
+
limit: str | None = Field(default=None)
|
39
46
|
|
40
47
|
|
41
48
|
class LogStart(BaseModel):
|
@@ -82,55 +89,54 @@ class EvalRecorder(FileRecorder):
|
|
82
89
|
self.data: dict[str, ZipLogFile] = {}
|
83
90
|
|
84
91
|
@override
|
85
|
-
def log_init(self, eval: EvalSpec, location: str | None = None) -> str:
|
86
|
-
# file
|
87
|
-
|
92
|
+
async def log_init(self, eval: EvalSpec, location: str | None = None) -> str:
|
93
|
+
# if the file exists then read summaries
|
94
|
+
if location is not None and self.fs.exists(location):
|
95
|
+
with file(location, "rb") as f:
|
96
|
+
with ZipFile(f, "r") as zip:
|
97
|
+
log_start = _read_start(zip)
|
98
|
+
summary_counter = _read_summary_counter(zip)
|
99
|
+
summaries = _read_all_summaries(zip, summary_counter)
|
100
|
+
else:
|
101
|
+
log_start = None
|
102
|
+
summary_counter = 0
|
103
|
+
summaries = []
|
88
104
|
|
89
105
|
# create zip wrapper
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
summary_counter = _read_summary_counter(zip_log_file.zip)
|
94
|
-
summaries = _read_all_summaries(zip_log_file.zip, summary_counter)
|
95
|
-
|
96
|
-
# Initialize the eval header (without results)
|
97
|
-
log_start = _read_start(zip_log_file.zip)
|
98
|
-
|
99
|
-
# The zip log file
|
100
|
-
zip_log_file.init(log_start, summary_counter, summaries)
|
106
|
+
zip_file = location or self._log_file_path(eval)
|
107
|
+
zip_log_file = ZipLogFile(file=zip_file)
|
108
|
+
await zip_log_file.init(log_start, summary_counter, summaries)
|
101
109
|
|
102
110
|
# track zip
|
103
111
|
self.data[self._log_file_key(eval)] = zip_log_file
|
104
112
|
|
105
113
|
# return file path
|
106
|
-
return
|
114
|
+
return zip_file
|
107
115
|
|
108
116
|
@override
|
109
|
-
def log_start(self, eval: EvalSpec, plan: EvalPlan) -> None:
|
117
|
+
async def log_start(self, eval: EvalSpec, plan: EvalPlan) -> None:
|
118
|
+
log = self.data[self._log_file_key(eval)]
|
110
119
|
start = LogStart(version=LOG_SCHEMA_VERSION, eval=eval, plan=plan)
|
111
|
-
|
112
|
-
|
113
|
-
log = self.data[self._log_file_key(eval)] # noqa: F841
|
114
|
-
log.log_start = start
|
120
|
+
await log.start(start)
|
115
121
|
|
116
122
|
@override
|
117
|
-
def log_sample(self, eval: EvalSpec, sample: EvalSample) -> None:
|
118
|
-
log = self.data[self._log_file_key(eval)]
|
119
|
-
log.
|
123
|
+
async def log_sample(self, eval: EvalSpec, sample: EvalSample) -> None:
|
124
|
+
log = self.data[self._log_file_key(eval)]
|
125
|
+
await log.buffer_sample(sample)
|
120
126
|
|
121
127
|
@override
|
122
|
-
def flush(self, eval: EvalSpec) -> None:
|
128
|
+
async def flush(self, eval: EvalSpec) -> None:
|
123
129
|
# get the zip log
|
124
130
|
log = self.data[self._log_file_key(eval)]
|
125
131
|
|
126
132
|
# write the buffered samples
|
127
|
-
|
133
|
+
await log.write_buffered_samples()
|
128
134
|
|
129
135
|
# flush to underlying stream
|
130
|
-
log.flush()
|
136
|
+
await log.flush()
|
131
137
|
|
132
138
|
@override
|
133
|
-
def log_finish(
|
139
|
+
async def log_finish(
|
134
140
|
self,
|
135
141
|
eval: EvalSpec,
|
136
142
|
status: Literal["started", "success", "cancelled", "error"],
|
@@ -144,18 +150,14 @@ class EvalRecorder(FileRecorder):
|
|
144
150
|
log = self.data[key]
|
145
151
|
|
146
152
|
# write the buffered samples
|
147
|
-
|
153
|
+
await log.write_buffered_samples()
|
148
154
|
|
149
155
|
# write consolidated summaries
|
150
|
-
|
156
|
+
await log.write(SUMMARIES_JSON, log._summaries)
|
151
157
|
|
152
158
|
# write reductions
|
153
159
|
if reductions is not None:
|
154
|
-
|
155
|
-
eval,
|
156
|
-
REDUCTIONS_JSON,
|
157
|
-
reductions,
|
158
|
-
)
|
160
|
+
await log.write(REDUCTIONS_JSON, reductions)
|
159
161
|
|
160
162
|
# Get the results
|
161
163
|
log_results = LogResults(
|
@@ -165,7 +167,7 @@ class EvalRecorder(FileRecorder):
|
|
165
167
|
# add the results to the original eval log from start.json
|
166
168
|
log_start = log.log_start
|
167
169
|
if log_start is None:
|
168
|
-
raise RuntimeError("
|
170
|
+
raise RuntimeError("Log not properly initialised")
|
169
171
|
|
170
172
|
eval_header = EvalLog(
|
171
173
|
version=log_start.version,
|
@@ -176,50 +178,39 @@ class EvalRecorder(FileRecorder):
|
|
176
178
|
status=log_results.status,
|
177
179
|
error=log_results.error,
|
178
180
|
)
|
179
|
-
|
180
|
-
# write the results
|
181
|
-
self._write(eval, HEADER_JSON, eval_header)
|
182
|
-
|
183
|
-
# close the file
|
184
|
-
log.close()
|
181
|
+
await log.write(HEADER_JSON, eval_header)
|
185
182
|
|
186
183
|
# stop tracking this eval
|
187
184
|
del self.data[key]
|
188
185
|
|
189
|
-
#
|
190
|
-
|
186
|
+
# flush and write the results
|
187
|
+
await log.flush()
|
188
|
+
return await log.close()
|
191
189
|
|
192
190
|
@classmethod
|
193
191
|
@override
|
194
|
-
def read_log(cls, location: str, header_only: bool = False) -> EvalLog:
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
".json"
|
213
|
-
):
|
214
|
-
with zip.open(name, "r") as f:
|
215
|
-
samples.append(EvalSample(**json.load(f)))
|
216
|
-
sort_samples(samples)
|
217
|
-
evalLog.samples = samples
|
218
|
-
return evalLog
|
192
|
+
async def read_log(cls, location: str, header_only: bool = False) -> EvalLog:
|
193
|
+
# if the log is not stored in the local filesystem then download it first,
|
194
|
+
# and then read it from a temp file (eliminates the possiblity of hundreds
|
195
|
+
# of small fetches from the zip file streams)
|
196
|
+
temp_log: str | None = None
|
197
|
+
fs = filesystem(location)
|
198
|
+
if not fs.is_local():
|
199
|
+
with tempfile.NamedTemporaryFile(delete=False) as temp:
|
200
|
+
temp_log = temp.name
|
201
|
+
fs.get_file(location, temp_log)
|
202
|
+
|
203
|
+
# read log (use temp_log if we have it)
|
204
|
+
try:
|
205
|
+
with file(temp_log or location, "rb") as z:
|
206
|
+
return _read_log(z, location, header_only)
|
207
|
+
finally:
|
208
|
+
if temp_log:
|
209
|
+
os.unlink(temp_log)
|
219
210
|
|
220
211
|
@override
|
221
212
|
@classmethod
|
222
|
-
def read_log_sample(
|
213
|
+
async def read_log_sample(
|
223
214
|
cls, location: str, id: str | int, epoch: int = 1
|
224
215
|
) -> EvalSample:
|
225
216
|
with file(location, "rb") as z:
|
@@ -234,67 +225,17 @@ class EvalRecorder(FileRecorder):
|
|
234
225
|
|
235
226
|
@classmethod
|
236
227
|
@override
|
237
|
-
def write_log(cls, location: str, log: EvalLog) -> None:
|
228
|
+
async def write_log(cls, location: str, log: EvalLog) -> None:
|
238
229
|
# write using the recorder (so we get all of the extra streams)
|
239
230
|
recorder = EvalRecorder(dirname(location))
|
240
|
-
recorder.log_init(log.eval, location)
|
241
|
-
recorder.log_start(log.eval, log.plan)
|
231
|
+
await recorder.log_init(log.eval, location)
|
232
|
+
await recorder.log_start(log.eval, log.plan)
|
242
233
|
for sample in log.samples or []:
|
243
|
-
recorder.log_sample(log.eval, sample)
|
244
|
-
recorder.log_finish(
|
234
|
+
await recorder.log_sample(log.eval, sample)
|
235
|
+
await recorder.log_finish(
|
245
236
|
log.eval, log.status, log.stats, log.results, log.reductions, log.error
|
246
237
|
)
|
247
238
|
|
248
|
-
# write to the zip file
|
249
|
-
def _write(self, eval: EvalSpec, filename: str, data: Any) -> None:
|
250
|
-
log = self.data[self._log_file_key(eval)]
|
251
|
-
zip_write(log.zip, filename, data)
|
252
|
-
|
253
|
-
# write buffered samples to the zip file
|
254
|
-
def _write_buffered_samples(self, eval: EvalSpec) -> None:
|
255
|
-
# get the log
|
256
|
-
log = self.data[self._log_file_key(eval)]
|
257
|
-
|
258
|
-
# Write the buffered samples
|
259
|
-
summaries: list[SampleSummary] = []
|
260
|
-
for sample in log.samples:
|
261
|
-
# Write the sample
|
262
|
-
self._write(eval, _sample_filename(sample.id, sample.epoch), sample)
|
263
|
-
|
264
|
-
# Capture the summary
|
265
|
-
summaries.append(
|
266
|
-
SampleSummary(
|
267
|
-
id=sample.id,
|
268
|
-
epoch=sample.epoch,
|
269
|
-
input=text_inputs(sample.input),
|
270
|
-
target=sample.target,
|
271
|
-
scores=sample.scores,
|
272
|
-
error=sample.error.message if sample.error is not None else None,
|
273
|
-
limit=f"{sample.limit.type}" if sample.limit is not None else None,
|
274
|
-
)
|
275
|
-
)
|
276
|
-
log.samples.clear()
|
277
|
-
|
278
|
-
# write intermediary summaries and add to master list
|
279
|
-
if len(summaries) > 0:
|
280
|
-
log.summary_counter += 1
|
281
|
-
summary_file = _journal_summary_file(log.summary_counter)
|
282
|
-
summary_path = _journal_summary_path(summary_file)
|
283
|
-
self._write(eval, summary_path, summaries)
|
284
|
-
log.summaries.extend(summaries)
|
285
|
-
|
286
|
-
|
287
|
-
def zip_write(zip: ZipFile, filename: str, data: Any) -> None:
|
288
|
-
zip.writestr(
|
289
|
-
filename,
|
290
|
-
to_json(
|
291
|
-
value=jsonable_python(data),
|
292
|
-
indent=2,
|
293
|
-
exclude_none=True,
|
294
|
-
fallback=lambda _x: None,
|
295
|
-
),
|
296
|
-
)
|
297
|
-
|
298
239
|
|
299
240
|
def text_inputs(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
|
300
241
|
# Clean the input of any images
|
@@ -317,52 +258,179 @@ def text_inputs(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
|
|
317
258
|
|
318
259
|
|
319
260
|
class ZipLogFile:
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
261
|
+
_zip: ZipFile
|
262
|
+
_temp_file: BinaryIO
|
263
|
+
_fs: FileSystem
|
264
|
+
_async_fs_context: _AsyncGeneratorContextManager[AsyncFileSystem] | None = None
|
265
|
+
_async_fs: AsyncFileSystem | None = None
|
324
266
|
|
325
267
|
def __init__(self, file: str) -> None:
|
326
|
-
self.
|
327
|
-
self.
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
self.
|
332
|
-
self.
|
333
|
-
self.
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
def init(
|
268
|
+
self._file = file
|
269
|
+
self._fs = filesystem(file)
|
270
|
+
self._lock = asyncio.Lock()
|
271
|
+
self._temp_file = tempfile.TemporaryFile()
|
272
|
+
self._samples: list[EvalSample] = []
|
273
|
+
self._summary_counter = 0
|
274
|
+
self._summaries: list[SampleSummary] = []
|
275
|
+
self._log_start: LogStart | None = None
|
276
|
+
|
277
|
+
async def init(
|
338
278
|
self,
|
339
279
|
log_start: LogStart | None,
|
340
280
|
summary_counter: int,
|
341
281
|
summaries: list[SampleSummary],
|
342
282
|
) -> None:
|
343
|
-
self.
|
344
|
-
|
345
|
-
|
283
|
+
async with self._lock:
|
284
|
+
# connect to async filesystem if we can
|
285
|
+
if self._fs.is_async():
|
286
|
+
self._async_fs_context = async_fileystem(self._file)
|
287
|
+
self._async_fs = await self._async_fs_context.__aenter__()
|
288
|
+
|
289
|
+
self._open()
|
290
|
+
self._summary_counter = summary_counter
|
291
|
+
self._summaries = summaries
|
292
|
+
self._log_start = log_start
|
293
|
+
|
294
|
+
@property
|
295
|
+
def log_start(self) -> LogStart | None:
|
296
|
+
return self._log_start
|
297
|
+
|
298
|
+
async def start(self, start: LogStart) -> None:
|
299
|
+
async with self._lock:
|
300
|
+
self._log_start = start
|
301
|
+
self._zip_writestr(_journal_path(START_JSON), start)
|
302
|
+
|
303
|
+
async def buffer_sample(self, sample: EvalSample) -> None:
|
304
|
+
async with self._lock:
|
305
|
+
self._samples.append(sample)
|
306
|
+
|
307
|
+
async def write_buffered_samples(self) -> None:
|
308
|
+
async with self._lock:
|
309
|
+
# Write the buffered samples
|
310
|
+
summaries: list[SampleSummary] = []
|
311
|
+
for sample in self._samples:
|
312
|
+
# Write the sample
|
313
|
+
self._zip_writestr(_sample_filename(sample.id, sample.epoch), sample)
|
314
|
+
|
315
|
+
# Capture the summary
|
316
|
+
summaries.append(
|
317
|
+
SampleSummary(
|
318
|
+
id=sample.id,
|
319
|
+
epoch=sample.epoch,
|
320
|
+
input=text_inputs(sample.input),
|
321
|
+
target=sample.target,
|
322
|
+
scores=sample.scores,
|
323
|
+
error=sample.error.message
|
324
|
+
if sample.error is not None
|
325
|
+
else None,
|
326
|
+
limit=f"{sample.limit.type}"
|
327
|
+
if sample.limit is not None
|
328
|
+
else None,
|
329
|
+
)
|
330
|
+
)
|
331
|
+
self._samples.clear()
|
332
|
+
|
333
|
+
# write intermediary summaries and add to master list
|
334
|
+
if len(summaries) > 0:
|
335
|
+
self._summary_counter += 1
|
336
|
+
summary_file = _journal_summary_file(self._summary_counter)
|
337
|
+
summary_path = _journal_summary_path(summary_file)
|
338
|
+
self._zip_writestr(summary_path, summaries)
|
339
|
+
self._summaries.extend(summaries)
|
340
|
+
|
341
|
+
async def write(self, filename: str, data: Any) -> None:
|
342
|
+
async with self._lock:
|
343
|
+
self._zip_writestr(filename, data)
|
344
|
+
|
345
|
+
async def flush(self) -> None:
|
346
|
+
async with self._lock:
|
347
|
+
# close the zip file so it is flushed
|
348
|
+
self._zip.close()
|
349
|
+
|
350
|
+
# read the temp_file (leaves pointer at end for subsequent appends)
|
351
|
+
self._temp_file.seek(0)
|
352
|
+
log_bytes = self._temp_file.read()
|
353
|
+
|
354
|
+
# attempt async write
|
355
|
+
written = False
|
356
|
+
try:
|
357
|
+
if self._async_fs:
|
358
|
+
await self._async_fs._pipe_file(self._file, log_bytes)
|
359
|
+
written = True
|
360
|
+
except Exception as ex:
|
361
|
+
logger.warning(
|
362
|
+
f"Error occurred during async write to {self._file}: {ex}. Falling back to sync write."
|
363
|
+
)
|
346
364
|
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
365
|
+
# write sync if we need to
|
366
|
+
if not written:
|
367
|
+
with file(self._file, "wb") as f:
|
368
|
+
f.write(log_bytes)
|
369
|
+
|
370
|
+
# re-open zip file w/ self.temp_file pointer at end
|
371
|
+
self._open()
|
372
|
+
|
373
|
+
async def close(self) -> EvalLog:
|
374
|
+
async with self._lock:
|
375
|
+
# close the async context if we have one
|
376
|
+
try:
|
377
|
+
if self._async_fs_context:
|
378
|
+
await self._async_fs_context.__aexit__(None, None, None)
|
379
|
+
except Exception as ex:
|
380
|
+
logger.warning(
|
381
|
+
f"Error occurred while closing async fs for {self._file}: {ex}"
|
382
|
+
)
|
353
383
|
|
354
|
-
|
355
|
-
|
356
|
-
|
384
|
+
# read the log from the temp file then close it
|
385
|
+
try:
|
386
|
+
self._temp_file.seek(0)
|
387
|
+
return _read_log(self._temp_file, self._file)
|
388
|
+
finally:
|
389
|
+
self._temp_file.close()
|
357
390
|
|
358
391
|
def _open(self) -> None:
|
359
|
-
self.
|
360
|
-
self.
|
392
|
+
self._zip = ZipFile(
|
393
|
+
self._temp_file,
|
361
394
|
mode="a",
|
362
395
|
compression=ZIP_DEFLATED,
|
363
396
|
compresslevel=5,
|
364
397
|
)
|
365
398
|
|
399
|
+
# raw unsynchronized version of write
|
400
|
+
def _zip_writestr(self, filename: str, data: Any) -> None:
|
401
|
+
self._zip.writestr(
|
402
|
+
filename,
|
403
|
+
to_json(
|
404
|
+
value=jsonable_python(data),
|
405
|
+
indent=2,
|
406
|
+
exclude_none=True,
|
407
|
+
fallback=lambda _x: None,
|
408
|
+
),
|
409
|
+
)
|
410
|
+
|
411
|
+
|
412
|
+
def _read_log(log: BinaryIO, location: str, header_only: bool = False) -> EvalLog:
|
413
|
+
with ZipFile(log, mode="r") as zip:
|
414
|
+
evalLog = _read_header(zip, location)
|
415
|
+
if REDUCTIONS_JSON in zip.namelist():
|
416
|
+
with zip.open(REDUCTIONS_JSON, "r") as f:
|
417
|
+
reductions = [
|
418
|
+
EvalSampleReductions(**reduction) for reduction in json.load(f)
|
419
|
+
]
|
420
|
+
if evalLog.results is not None:
|
421
|
+
evalLog.reductions = reductions
|
422
|
+
|
423
|
+
samples: list[EvalSample] | None = None
|
424
|
+
if not header_only:
|
425
|
+
samples = []
|
426
|
+
for name in zip.namelist():
|
427
|
+
if name.startswith(f"{SAMPLES_DIR}/") and name.endswith(".json"):
|
428
|
+
with zip.open(name, "r") as f:
|
429
|
+
samples.append(EvalSample(**json.load(f)))
|
430
|
+
sort_samples(samples)
|
431
|
+
evalLog.samples = samples
|
432
|
+
return evalLog
|
433
|
+
|
366
434
|
|
367
435
|
def _read_start(zip: ZipFile) -> LogStart | None:
|
368
436
|
start_path = _journal_path(START_JSON)
|
@@ -1,15 +1,16 @@
|
|
1
|
+
from logging import getLogger
|
1
2
|
from typing import Any
|
2
3
|
|
3
4
|
from typing_extensions import override
|
4
5
|
|
5
|
-
from inspect_ai._util.file import
|
6
|
-
filesystem,
|
7
|
-
)
|
6
|
+
from inspect_ai._util.file import filesystem
|
8
7
|
from inspect_ai._util.registry import registry_unqualified_name
|
9
8
|
|
10
9
|
from .._log import EvalLog, EvalSample, EvalSpec
|
11
10
|
from .recorder import Recorder
|
12
11
|
|
12
|
+
logger = getLogger(__name__)
|
13
|
+
|
13
14
|
|
14
15
|
class FileRecorder(Recorder):
|
15
16
|
__last_read_sample_log: tuple[str, EvalLog] | None = None
|
@@ -18,23 +19,25 @@ class FileRecorder(Recorder):
|
|
18
19
|
self, log_dir: str, suffix: str, fs_options: dict[str, Any] = {}
|
19
20
|
) -> None:
|
20
21
|
self.log_dir = log_dir.rstrip("/\\")
|
22
|
+
self.suffix = suffix
|
23
|
+
|
24
|
+
# initialise filesystem
|
21
25
|
self.fs = filesystem(log_dir, fs_options)
|
22
26
|
self.fs.mkdir(self.log_dir, exist_ok=True)
|
23
|
-
self.suffix = suffix
|
24
27
|
|
25
28
|
def is_local(self) -> bool:
|
26
29
|
return self.fs.is_local()
|
27
30
|
|
28
31
|
@override
|
29
32
|
@classmethod
|
30
|
-
def read_log_sample(
|
33
|
+
async def read_log_sample(
|
31
34
|
cls, location: str, id: str | int, epoch: int = 1
|
32
35
|
) -> EvalSample:
|
33
36
|
# establish the log to read from (might be cached)
|
34
37
|
if cls.__last_read_sample_log and (cls.__last_read_sample_log[0] == "location"):
|
35
38
|
eval_log = cls.__last_read_sample_log[1]
|
36
39
|
else:
|
37
|
-
eval_log = cls.read_log(location)
|
40
|
+
eval_log = await cls.read_log(location)
|
38
41
|
cls.__last_read_sample_log = (location, eval_log)
|
39
42
|
|
40
43
|
# throw if no samples
|
@@ -1,3 +1,4 @@
|
|
1
|
+
from logging import getLogger
|
1
2
|
from typing import Any, Literal, get_args
|
2
3
|
|
3
4
|
import ijson # type: ignore
|
@@ -10,7 +11,9 @@ from inspect_ai._util.constants import LOG_SCHEMA_VERSION
|
|
10
11
|
from inspect_ai._util.error import EvalError
|
11
12
|
from inspect_ai._util.file import (
|
12
13
|
absolute_file_path,
|
14
|
+
async_fileystem,
|
13
15
|
file,
|
16
|
+
filesystem,
|
14
17
|
)
|
15
18
|
|
16
19
|
from .._log import (
|
@@ -25,6 +28,8 @@ from .._log import (
|
|
25
28
|
)
|
26
29
|
from .file import FileRecorder
|
27
30
|
|
31
|
+
logger = getLogger(__name__)
|
32
|
+
|
28
33
|
|
29
34
|
class JSONRecorder(FileRecorder):
|
30
35
|
@override
|
@@ -57,7 +62,7 @@ class JSONRecorder(FileRecorder):
|
|
57
62
|
self.data: dict[str, JSONRecorder.JSONLogFile] = {}
|
58
63
|
|
59
64
|
@override
|
60
|
-
def log_init(self, eval: EvalSpec, location: str | None = None) -> str:
|
65
|
+
async def log_init(self, eval: EvalSpec, location: str | None = None) -> str:
|
61
66
|
# initialize file log for this eval
|
62
67
|
# compute an absolute path if it's a relative ref
|
63
68
|
# (so that the writes go to the correct place even
|
@@ -75,19 +80,19 @@ class JSONRecorder(FileRecorder):
|
|
75
80
|
return file
|
76
81
|
|
77
82
|
@override
|
78
|
-
def log_start(self, eval: EvalSpec, plan: EvalPlan) -> None:
|
83
|
+
async def log_start(self, eval: EvalSpec, plan: EvalPlan) -> None:
|
79
84
|
log = self.data[self._log_file_key(eval)]
|
80
85
|
log.data.plan = plan
|
81
86
|
|
82
87
|
@override
|
83
|
-
def log_sample(self, eval: EvalSpec, sample: EvalSample) -> None:
|
88
|
+
async def log_sample(self, eval: EvalSpec, sample: EvalSample) -> None:
|
84
89
|
log = self.data[self._log_file_key(eval)]
|
85
90
|
if log.data.samples is None:
|
86
91
|
log.data.samples = []
|
87
92
|
log.data.samples.append(sample)
|
88
93
|
|
89
94
|
@override
|
90
|
-
def log_finish(
|
95
|
+
async def log_finish(
|
91
96
|
self,
|
92
97
|
spec: EvalSpec,
|
93
98
|
status: Literal["started", "success", "cancelled", "error"],
|
@@ -104,7 +109,7 @@ class JSONRecorder(FileRecorder):
|
|
104
109
|
log.data.error = error
|
105
110
|
if reductions:
|
106
111
|
log.data.reductions = reductions
|
107
|
-
self.write_log(log.file, log.data)
|
112
|
+
await self.write_log(log.file, log.data)
|
108
113
|
log.data.location = log.file
|
109
114
|
|
110
115
|
# stop tracking this data
|
@@ -114,13 +119,13 @@ class JSONRecorder(FileRecorder):
|
|
114
119
|
return log.data
|
115
120
|
|
116
121
|
@override
|
117
|
-
def flush(self, eval: EvalSpec) -> None:
|
122
|
+
async def flush(self, eval: EvalSpec) -> None:
|
118
123
|
log = self.data[self._log_file_key(eval)]
|
119
|
-
self.write_log(log.file, log.data)
|
124
|
+
await self.write_log(log.file, log.data)
|
120
125
|
|
121
126
|
@override
|
122
127
|
@classmethod
|
123
|
-
def read_log(cls, location: str, header_only: bool = False) -> EvalLog:
|
128
|
+
async def read_log(cls, location: str, header_only: bool = False) -> EvalLog:
|
124
129
|
if header_only:
|
125
130
|
try:
|
126
131
|
return _read_header_streaming(location)
|
@@ -138,7 +143,7 @@ class JSONRecorder(FileRecorder):
|
|
138
143
|
else:
|
139
144
|
raise ValueError(f"Unable to read log file: {location}") from ex
|
140
145
|
|
141
|
-
#
|
146
|
+
# full reads (and fallback to streaing reads if they encounter invalid json characters)
|
142
147
|
with file(location, "r") as f:
|
143
148
|
# parse w/ pydantic
|
144
149
|
raw_data = from_json(f.read())
|
@@ -166,15 +171,33 @@ class JSONRecorder(FileRecorder):
|
|
166
171
|
|
167
172
|
@override
|
168
173
|
@classmethod
|
169
|
-
def write_log(cls, location: str, log: EvalLog) -> None:
|
174
|
+
async def write_log(cls, location: str, log: EvalLog) -> None:
|
170
175
|
from inspect_ai.log._file import eval_log_json
|
171
176
|
|
172
177
|
# sort samples before writing as they can come in out of order
|
173
178
|
if log.samples:
|
174
179
|
sort_samples(log.samples)
|
175
180
|
|
176
|
-
|
177
|
-
|
181
|
+
# get log as bytes
|
182
|
+
log_bytes = eval_log_json(log)
|
183
|
+
|
184
|
+
# try to write async for async filesystems
|
185
|
+
written = False
|
186
|
+
try:
|
187
|
+
fs = filesystem(location)
|
188
|
+
if fs.is_async():
|
189
|
+
async with async_fileystem(location) as async_fs:
|
190
|
+
await async_fs._pipe_file(location, log_bytes)
|
191
|
+
written = True
|
192
|
+
except Exception as ex:
|
193
|
+
logger.warning(
|
194
|
+
f"Error occurred during async write to {location}: {ex}. Falling back to sync write."
|
195
|
+
)
|
196
|
+
|
197
|
+
# otherwise use sync
|
198
|
+
if not written:
|
199
|
+
with file(location, "wb") as f:
|
200
|
+
f.write(log_bytes)
|
178
201
|
|
179
202
|
|
180
203
|
def _validate_version(ver: int) -> None:
|