inspect-ai 0.3.49__py3-none-any.whl → 0.3.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. inspect_ai/_cli/info.py +2 -2
  2. inspect_ai/_cli/log.py +2 -2
  3. inspect_ai/_cli/score.py +2 -2
  4. inspect_ai/_display/core/display.py +19 -0
  5. inspect_ai/_display/core/panel.py +37 -7
  6. inspect_ai/_display/core/progress.py +29 -2
  7. inspect_ai/_display/core/results.py +79 -40
  8. inspect_ai/_display/core/textual.py +21 -0
  9. inspect_ai/_display/rich/display.py +28 -8
  10. inspect_ai/_display/textual/app.py +107 -1
  11. inspect_ai/_display/textual/display.py +1 -1
  12. inspect_ai/_display/textual/widgets/samples.py +132 -91
  13. inspect_ai/_display/textual/widgets/task_detail.py +236 -0
  14. inspect_ai/_display/textual/widgets/tasks.py +74 -6
  15. inspect_ai/_display/textual/widgets/toggle.py +32 -0
  16. inspect_ai/_eval/context.py +2 -0
  17. inspect_ai/_eval/eval.py +4 -3
  18. inspect_ai/_eval/loader.py +1 -1
  19. inspect_ai/_eval/run.py +35 -2
  20. inspect_ai/_eval/task/log.py +13 -11
  21. inspect_ai/_eval/task/results.py +12 -3
  22. inspect_ai/_eval/task/run.py +139 -36
  23. inspect_ai/_eval/task/sandbox.py +2 -1
  24. inspect_ai/_util/_async.py +30 -1
  25. inspect_ai/_util/file.py +31 -4
  26. inspect_ai/_util/html.py +3 -0
  27. inspect_ai/_util/logger.py +6 -5
  28. inspect_ai/_util/platform.py +5 -6
  29. inspect_ai/_util/registry.py +1 -1
  30. inspect_ai/_view/server.py +9 -9
  31. inspect_ai/_view/www/App.css +2 -2
  32. inspect_ai/_view/www/dist/assets/index.css +2 -2
  33. inspect_ai/_view/www/dist/assets/index.js +352 -294
  34. inspect_ai/_view/www/log-schema.json +13 -0
  35. inspect_ai/_view/www/package.json +1 -0
  36. inspect_ai/_view/www/src/components/MessageBand.mjs +1 -1
  37. inspect_ai/_view/www/src/components/Tools.mjs +16 -13
  38. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -3
  39. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +52 -77
  40. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -13
  41. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +15 -2
  42. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +4 -2
  43. inspect_ai/_view/www/src/types/log.d.ts +2 -0
  44. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +2 -0
  45. inspect_ai/_view/www/yarn.lock +9 -4
  46. inspect_ai/approval/__init__.py +1 -1
  47. inspect_ai/approval/_human/approver.py +35 -0
  48. inspect_ai/approval/_human/console.py +62 -0
  49. inspect_ai/approval/_human/manager.py +108 -0
  50. inspect_ai/approval/_human/panel.py +233 -0
  51. inspect_ai/approval/_human/util.py +51 -0
  52. inspect_ai/dataset/_sources/hf.py +2 -2
  53. inspect_ai/dataset/_sources/util.py +1 -1
  54. inspect_ai/log/_file.py +106 -36
  55. inspect_ai/log/_recorders/eval.py +226 -158
  56. inspect_ai/log/_recorders/file.py +9 -6
  57. inspect_ai/log/_recorders/json.py +35 -12
  58. inspect_ai/log/_recorders/recorder.py +15 -15
  59. inspect_ai/log/_samples.py +52 -0
  60. inspect_ai/model/_model.py +14 -0
  61. inspect_ai/model/_model_output.py +4 -0
  62. inspect_ai/model/_providers/azureai.py +1 -1
  63. inspect_ai/model/_providers/hf.py +106 -4
  64. inspect_ai/model/_providers/util/__init__.py +2 -0
  65. inspect_ai/model/_providers/util/hf_handler.py +200 -0
  66. inspect_ai/scorer/_common.py +1 -1
  67. inspect_ai/solver/_plan.py +0 -8
  68. inspect_ai/solver/_task_state.py +18 -1
  69. inspect_ai/solver/_use_tools.py +9 -1
  70. inspect_ai/tool/_tool_def.py +2 -2
  71. inspect_ai/tool/_tool_info.py +14 -2
  72. inspect_ai/tool/_tool_params.py +2 -1
  73. inspect_ai/tool/_tools/_execute.py +1 -1
  74. inspect_ai/tool/_tools/_web_browser/_web_browser.py +6 -0
  75. inspect_ai/util/__init__.py +5 -6
  76. inspect_ai/util/_panel.py +91 -0
  77. inspect_ai/util/_sandbox/__init__.py +2 -6
  78. inspect_ai/util/_sandbox/context.py +4 -3
  79. inspect_ai/util/_sandbox/docker/compose.py +12 -2
  80. inspect_ai/util/_sandbox/docker/docker.py +19 -9
  81. inspect_ai/util/_sandbox/docker/util.py +10 -2
  82. inspect_ai/util/_sandbox/environment.py +47 -41
  83. inspect_ai/util/_sandbox/local.py +15 -10
  84. inspect_ai/util/_subprocess.py +43 -3
  85. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/METADATA +2 -2
  86. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/RECORD +90 -82
  87. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
  88. inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
  89. inspect_ai/approval/_human.py +0 -123
  90. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/LICENSE +0 -0
  91. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/WHEEL +0 -0
  92. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/entry_points.txt +0 -0
  93. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,13 @@
1
+ import asyncio
1
2
  import json
3
+ import os
2
4
  import tempfile
5
+ from contextlib import _AsyncGeneratorContextManager
6
+ from logging import getLogger
3
7
  from typing import Any, BinaryIO, Literal, cast
4
8
  from zipfile import ZIP_DEFLATED, ZipFile
5
9
 
10
+ from fsspec.asyn import AsyncFileSystem # type: ignore
6
11
  from pydantic import BaseModel, Field
7
12
  from pydantic_core import to_json
8
13
  from typing_extensions import override
@@ -10,7 +15,7 @@ from typing_extensions import override
10
15
  from inspect_ai._util.constants import LOG_SCHEMA_VERSION
11
16
  from inspect_ai._util.content import ContentImage, ContentText
12
17
  from inspect_ai._util.error import EvalError
13
- from inspect_ai._util.file import dirname, file
18
+ from inspect_ai._util.file import FileSystem, async_fileystem, dirname, file, filesystem
14
19
  from inspect_ai._util.json import jsonable_python
15
20
  from inspect_ai.model._chat_message import ChatMessage
16
21
  from inspect_ai.scorer._metric import Score
@@ -27,15 +32,17 @@ from .._log import (
27
32
  )
28
33
  from .file import FileRecorder
29
34
 
35
+ logger = getLogger(__name__)
36
+
30
37
 
31
38
  class SampleSummary(BaseModel):
32
39
  id: int | str
33
40
  epoch: int
34
41
  input: str | list[ChatMessage]
35
42
  target: str | list[str]
36
- scores: dict[str, Score] | None
37
- error: str | None
38
- limit: str | None
43
+ scores: dict[str, Score] | None = Field(default=None)
44
+ error: str | None = Field(default=None)
45
+ limit: str | None = Field(default=None)
39
46
 
40
47
 
41
48
  class LogStart(BaseModel):
@@ -82,55 +89,54 @@ class EvalRecorder(FileRecorder):
82
89
  self.data: dict[str, ZipLogFile] = {}
83
90
 
84
91
  @override
85
- def log_init(self, eval: EvalSpec, location: str | None = None) -> str:
86
- # file to write to
87
- file = location or self._log_file_path(eval)
92
+ async def log_init(self, eval: EvalSpec, location: str | None = None) -> str:
93
+ # if the file exists then read summaries
94
+ if location is not None and self.fs.exists(location):
95
+ with file(location, "rb") as f:
96
+ with ZipFile(f, "r") as zip:
97
+ log_start = _read_start(zip)
98
+ summary_counter = _read_summary_counter(zip)
99
+ summaries = _read_all_summaries(zip, summary_counter)
100
+ else:
101
+ log_start = None
102
+ summary_counter = 0
103
+ summaries = []
88
104
 
89
105
  # create zip wrapper
90
- zip_log_file = ZipLogFile(file=file)
91
-
92
- # Initialize the summary counter and existing summaries
93
- summary_counter = _read_summary_counter(zip_log_file.zip)
94
- summaries = _read_all_summaries(zip_log_file.zip, summary_counter)
95
-
96
- # Initialize the eval header (without results)
97
- log_start = _read_start(zip_log_file.zip)
98
-
99
- # The zip log file
100
- zip_log_file.init(log_start, summary_counter, summaries)
106
+ zip_file = location or self._log_file_path(eval)
107
+ zip_log_file = ZipLogFile(file=zip_file)
108
+ await zip_log_file.init(log_start, summary_counter, summaries)
101
109
 
102
110
  # track zip
103
111
  self.data[self._log_file_key(eval)] = zip_log_file
104
112
 
105
113
  # return file path
106
- return file
114
+ return zip_file
107
115
 
108
116
  @override
109
- def log_start(self, eval: EvalSpec, plan: EvalPlan) -> None:
117
+ async def log_start(self, eval: EvalSpec, plan: EvalPlan) -> None:
118
+ log = self.data[self._log_file_key(eval)]
110
119
  start = LogStart(version=LOG_SCHEMA_VERSION, eval=eval, plan=plan)
111
- self._write(eval, _journal_path(START_JSON), start)
112
-
113
- log = self.data[self._log_file_key(eval)] # noqa: F841
114
- log.log_start = start
120
+ await log.start(start)
115
121
 
116
122
  @override
117
- def log_sample(self, eval: EvalSpec, sample: EvalSample) -> None:
118
- log = self.data[self._log_file_key(eval)] # noqa: F841
119
- log.samples.append(sample)
123
+ async def log_sample(self, eval: EvalSpec, sample: EvalSample) -> None:
124
+ log = self.data[self._log_file_key(eval)]
125
+ await log.buffer_sample(sample)
120
126
 
121
127
  @override
122
- def flush(self, eval: EvalSpec) -> None:
128
+ async def flush(self, eval: EvalSpec) -> None:
123
129
  # get the zip log
124
130
  log = self.data[self._log_file_key(eval)]
125
131
 
126
132
  # write the buffered samples
127
- self._write_buffered_samples(eval)
133
+ await log.write_buffered_samples()
128
134
 
129
135
  # flush to underlying stream
130
- log.flush()
136
+ await log.flush()
131
137
 
132
138
  @override
133
- def log_finish(
139
+ async def log_finish(
134
140
  self,
135
141
  eval: EvalSpec,
136
142
  status: Literal["started", "success", "cancelled", "error"],
@@ -144,18 +150,14 @@ class EvalRecorder(FileRecorder):
144
150
  log = self.data[key]
145
151
 
146
152
  # write the buffered samples
147
- self._write_buffered_samples(eval)
153
+ await log.write_buffered_samples()
148
154
 
149
155
  # write consolidated summaries
150
- self._write(eval, SUMMARIES_JSON, log.summaries)
156
+ await log.write(SUMMARIES_JSON, log._summaries)
151
157
 
152
158
  # write reductions
153
159
  if reductions is not None:
154
- self._write(
155
- eval,
156
- REDUCTIONS_JSON,
157
- reductions,
158
- )
160
+ await log.write(REDUCTIONS_JSON, reductions)
159
161
 
160
162
  # Get the results
161
163
  log_results = LogResults(
@@ -165,7 +167,7 @@ class EvalRecorder(FileRecorder):
165
167
  # add the results to the original eval log from start.json
166
168
  log_start = log.log_start
167
169
  if log_start is None:
168
- raise RuntimeError("Unexpectedly issing the log start value")
170
+ raise RuntimeError("Log not properly initialised")
169
171
 
170
172
  eval_header = EvalLog(
171
173
  version=log_start.version,
@@ -176,50 +178,39 @@ class EvalRecorder(FileRecorder):
176
178
  status=log_results.status,
177
179
  error=log_results.error,
178
180
  )
179
-
180
- # write the results
181
- self._write(eval, HEADER_JSON, eval_header)
182
-
183
- # close the file
184
- log.close()
181
+ await log.write(HEADER_JSON, eval_header)
185
182
 
186
183
  # stop tracking this eval
187
184
  del self.data[key]
188
185
 
189
- # return the full EvalLog
190
- return self.read_log(log.file)
186
+ # flush and write the results
187
+ await log.flush()
188
+ return await log.close()
191
189
 
192
190
  @classmethod
193
191
  @override
194
- def read_log(cls, location: str, header_only: bool = False) -> EvalLog:
195
- with file(location, "rb") as z:
196
- with ZipFile(z, mode="r") as zip:
197
- evalLog = _read_header(zip, location)
198
- if REDUCTIONS_JSON in zip.namelist():
199
- with zip.open(REDUCTIONS_JSON, "r") as f:
200
- reductions = [
201
- EvalSampleReductions(**reduction)
202
- for reduction in json.load(f)
203
- ]
204
- if evalLog.results is not None:
205
- evalLog.reductions = reductions
206
-
207
- samples: list[EvalSample] | None = None
208
- if not header_only:
209
- samples = []
210
- for name in zip.namelist():
211
- if name.startswith(f"{SAMPLES_DIR}/") and name.endswith(
212
- ".json"
213
- ):
214
- with zip.open(name, "r") as f:
215
- samples.append(EvalSample(**json.load(f)))
216
- sort_samples(samples)
217
- evalLog.samples = samples
218
- return evalLog
192
+ async def read_log(cls, location: str, header_only: bool = False) -> EvalLog:
193
+ # if the log is not stored in the local filesystem then download it first,
194
+ # and then read it from a temp file (eliminates the possiblity of hundreds
195
+ # of small fetches from the zip file streams)
196
+ temp_log: str | None = None
197
+ fs = filesystem(location)
198
+ if not fs.is_local():
199
+ with tempfile.NamedTemporaryFile(delete=False) as temp:
200
+ temp_log = temp.name
201
+ fs.get_file(location, temp_log)
202
+
203
+ # read log (use temp_log if we have it)
204
+ try:
205
+ with file(temp_log or location, "rb") as z:
206
+ return _read_log(z, location, header_only)
207
+ finally:
208
+ if temp_log:
209
+ os.unlink(temp_log)
219
210
 
220
211
  @override
221
212
  @classmethod
222
- def read_log_sample(
213
+ async def read_log_sample(
223
214
  cls, location: str, id: str | int, epoch: int = 1
224
215
  ) -> EvalSample:
225
216
  with file(location, "rb") as z:
@@ -234,67 +225,17 @@ class EvalRecorder(FileRecorder):
234
225
 
235
226
  @classmethod
236
227
  @override
237
- def write_log(cls, location: str, log: EvalLog) -> None:
228
+ async def write_log(cls, location: str, log: EvalLog) -> None:
238
229
  # write using the recorder (so we get all of the extra streams)
239
230
  recorder = EvalRecorder(dirname(location))
240
- recorder.log_init(log.eval, location)
241
- recorder.log_start(log.eval, log.plan)
231
+ await recorder.log_init(log.eval, location)
232
+ await recorder.log_start(log.eval, log.plan)
242
233
  for sample in log.samples or []:
243
- recorder.log_sample(log.eval, sample)
244
- recorder.log_finish(
234
+ await recorder.log_sample(log.eval, sample)
235
+ await recorder.log_finish(
245
236
  log.eval, log.status, log.stats, log.results, log.reductions, log.error
246
237
  )
247
238
 
248
- # write to the zip file
249
- def _write(self, eval: EvalSpec, filename: str, data: Any) -> None:
250
- log = self.data[self._log_file_key(eval)]
251
- zip_write(log.zip, filename, data)
252
-
253
- # write buffered samples to the zip file
254
- def _write_buffered_samples(self, eval: EvalSpec) -> None:
255
- # get the log
256
- log = self.data[self._log_file_key(eval)]
257
-
258
- # Write the buffered samples
259
- summaries: list[SampleSummary] = []
260
- for sample in log.samples:
261
- # Write the sample
262
- self._write(eval, _sample_filename(sample.id, sample.epoch), sample)
263
-
264
- # Capture the summary
265
- summaries.append(
266
- SampleSummary(
267
- id=sample.id,
268
- epoch=sample.epoch,
269
- input=text_inputs(sample.input),
270
- target=sample.target,
271
- scores=sample.scores,
272
- error=sample.error.message if sample.error is not None else None,
273
- limit=f"{sample.limit.type}" if sample.limit is not None else None,
274
- )
275
- )
276
- log.samples.clear()
277
-
278
- # write intermediary summaries and add to master list
279
- if len(summaries) > 0:
280
- log.summary_counter += 1
281
- summary_file = _journal_summary_file(log.summary_counter)
282
- summary_path = _journal_summary_path(summary_file)
283
- self._write(eval, summary_path, summaries)
284
- log.summaries.extend(summaries)
285
-
286
-
287
- def zip_write(zip: ZipFile, filename: str, data: Any) -> None:
288
- zip.writestr(
289
- filename,
290
- to_json(
291
- value=jsonable_python(data),
292
- indent=2,
293
- exclude_none=True,
294
- fallback=lambda _x: None,
295
- ),
296
- )
297
-
298
239
 
299
240
  def text_inputs(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
300
241
  # Clean the input of any images
@@ -317,52 +258,179 @@ def text_inputs(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
317
258
 
318
259
 
319
260
  class ZipLogFile:
320
- TEMP_LOG_FILE_MAX = 20 * 1024 * 1024
321
-
322
- zip: ZipFile
323
- temp_file: BinaryIO
261
+ _zip: ZipFile
262
+ _temp_file: BinaryIO
263
+ _fs: FileSystem
264
+ _async_fs_context: _AsyncGeneratorContextManager[AsyncFileSystem] | None = None
265
+ _async_fs: AsyncFileSystem | None = None
324
266
 
325
267
  def __init__(self, file: str) -> None:
326
- self.file = file
327
- self.temp_file = cast(
328
- BinaryIO,
329
- tempfile.SpooledTemporaryFile(self.TEMP_LOG_FILE_MAX),
330
- )
331
- self._open()
332
- self.samples: list[EvalSample] = []
333
- self.summary_counter = 0
334
- self.summaries: list[SampleSummary] = []
335
- self.log_start: LogStart | None = None
336
-
337
- def init(
268
+ self._file = file
269
+ self._fs = filesystem(file)
270
+ self._lock = asyncio.Lock()
271
+ self._temp_file = tempfile.TemporaryFile()
272
+ self._samples: list[EvalSample] = []
273
+ self._summary_counter = 0
274
+ self._summaries: list[SampleSummary] = []
275
+ self._log_start: LogStart | None = None
276
+
277
+ async def init(
338
278
  self,
339
279
  log_start: LogStart | None,
340
280
  summary_counter: int,
341
281
  summaries: list[SampleSummary],
342
282
  ) -> None:
343
- self.summary_counter = summary_counter
344
- self.summaries = summaries
345
- self.log_start = log_start
283
+ async with self._lock:
284
+ # connect to async filesystem if we can
285
+ if self._fs.is_async():
286
+ self._async_fs_context = async_fileystem(self._file)
287
+ self._async_fs = await self._async_fs_context.__aenter__()
288
+
289
+ self._open()
290
+ self._summary_counter = summary_counter
291
+ self._summaries = summaries
292
+ self._log_start = log_start
293
+
294
+ @property
295
+ def log_start(self) -> LogStart | None:
296
+ return self._log_start
297
+
298
+ async def start(self, start: LogStart) -> None:
299
+ async with self._lock:
300
+ self._log_start = start
301
+ self._zip_writestr(_journal_path(START_JSON), start)
302
+
303
+ async def buffer_sample(self, sample: EvalSample) -> None:
304
+ async with self._lock:
305
+ self._samples.append(sample)
306
+
307
+ async def write_buffered_samples(self) -> None:
308
+ async with self._lock:
309
+ # Write the buffered samples
310
+ summaries: list[SampleSummary] = []
311
+ for sample in self._samples:
312
+ # Write the sample
313
+ self._zip_writestr(_sample_filename(sample.id, sample.epoch), sample)
314
+
315
+ # Capture the summary
316
+ summaries.append(
317
+ SampleSummary(
318
+ id=sample.id,
319
+ epoch=sample.epoch,
320
+ input=text_inputs(sample.input),
321
+ target=sample.target,
322
+ scores=sample.scores,
323
+ error=sample.error.message
324
+ if sample.error is not None
325
+ else None,
326
+ limit=f"{sample.limit.type}"
327
+ if sample.limit is not None
328
+ else None,
329
+ )
330
+ )
331
+ self._samples.clear()
332
+
333
+ # write intermediary summaries and add to master list
334
+ if len(summaries) > 0:
335
+ self._summary_counter += 1
336
+ summary_file = _journal_summary_file(self._summary_counter)
337
+ summary_path = _journal_summary_path(summary_file)
338
+ self._zip_writestr(summary_path, summaries)
339
+ self._summaries.extend(summaries)
340
+
341
+ async def write(self, filename: str, data: Any) -> None:
342
+ async with self._lock:
343
+ self._zip_writestr(filename, data)
344
+
345
+ async def flush(self) -> None:
346
+ async with self._lock:
347
+ # close the zip file so it is flushed
348
+ self._zip.close()
349
+
350
+ # read the temp_file (leaves pointer at end for subsequent appends)
351
+ self._temp_file.seek(0)
352
+ log_bytes = self._temp_file.read()
353
+
354
+ # attempt async write
355
+ written = False
356
+ try:
357
+ if self._async_fs:
358
+ await self._async_fs._pipe_file(self._file, log_bytes)
359
+ written = True
360
+ except Exception as ex:
361
+ logger.warning(
362
+ f"Error occurred during async write to {self._file}: {ex}. Falling back to sync write."
363
+ )
346
364
 
347
- def flush(self) -> None:
348
- self.zip.close()
349
- self.temp_file.seek(0)
350
- with file(self.file, "wb") as f:
351
- f.write(self.temp_file.read())
352
- self._open()
365
+ # write sync if we need to
366
+ if not written:
367
+ with file(self._file, "wb") as f:
368
+ f.write(log_bytes)
369
+
370
+ # re-open zip file w/ self.temp_file pointer at end
371
+ self._open()
372
+
373
+ async def close(self) -> EvalLog:
374
+ async with self._lock:
375
+ # close the async context if we have one
376
+ try:
377
+ if self._async_fs_context:
378
+ await self._async_fs_context.__aexit__(None, None, None)
379
+ except Exception as ex:
380
+ logger.warning(
381
+ f"Error occurred while closing async fs for {self._file}: {ex}"
382
+ )
353
383
 
354
- def close(self) -> None:
355
- self.flush()
356
- self.temp_file.close()
384
+ # read the log from the temp file then close it
385
+ try:
386
+ self._temp_file.seek(0)
387
+ return _read_log(self._temp_file, self._file)
388
+ finally:
389
+ self._temp_file.close()
357
390
 
358
391
  def _open(self) -> None:
359
- self.zip = ZipFile(
360
- self.temp_file,
392
+ self._zip = ZipFile(
393
+ self._temp_file,
361
394
  mode="a",
362
395
  compression=ZIP_DEFLATED,
363
396
  compresslevel=5,
364
397
  )
365
398
 
399
+ # raw unsynchronized version of write
400
+ def _zip_writestr(self, filename: str, data: Any) -> None:
401
+ self._zip.writestr(
402
+ filename,
403
+ to_json(
404
+ value=jsonable_python(data),
405
+ indent=2,
406
+ exclude_none=True,
407
+ fallback=lambda _x: None,
408
+ ),
409
+ )
410
+
411
+
412
+ def _read_log(log: BinaryIO, location: str, header_only: bool = False) -> EvalLog:
413
+ with ZipFile(log, mode="r") as zip:
414
+ evalLog = _read_header(zip, location)
415
+ if REDUCTIONS_JSON in zip.namelist():
416
+ with zip.open(REDUCTIONS_JSON, "r") as f:
417
+ reductions = [
418
+ EvalSampleReductions(**reduction) for reduction in json.load(f)
419
+ ]
420
+ if evalLog.results is not None:
421
+ evalLog.reductions = reductions
422
+
423
+ samples: list[EvalSample] | None = None
424
+ if not header_only:
425
+ samples = []
426
+ for name in zip.namelist():
427
+ if name.startswith(f"{SAMPLES_DIR}/") and name.endswith(".json"):
428
+ with zip.open(name, "r") as f:
429
+ samples.append(EvalSample(**json.load(f)))
430
+ sort_samples(samples)
431
+ evalLog.samples = samples
432
+ return evalLog
433
+
366
434
 
367
435
  def _read_start(zip: ZipFile) -> LogStart | None:
368
436
  start_path = _journal_path(START_JSON)
@@ -1,15 +1,16 @@
1
+ from logging import getLogger
1
2
  from typing import Any
2
3
 
3
4
  from typing_extensions import override
4
5
 
5
- from inspect_ai._util.file import (
6
- filesystem,
7
- )
6
+ from inspect_ai._util.file import filesystem
8
7
  from inspect_ai._util.registry import registry_unqualified_name
9
8
 
10
9
  from .._log import EvalLog, EvalSample, EvalSpec
11
10
  from .recorder import Recorder
12
11
 
12
+ logger = getLogger(__name__)
13
+
13
14
 
14
15
  class FileRecorder(Recorder):
15
16
  __last_read_sample_log: tuple[str, EvalLog] | None = None
@@ -18,23 +19,25 @@ class FileRecorder(Recorder):
18
19
  self, log_dir: str, suffix: str, fs_options: dict[str, Any] = {}
19
20
  ) -> None:
20
21
  self.log_dir = log_dir.rstrip("/\\")
22
+ self.suffix = suffix
23
+
24
+ # initialise filesystem
21
25
  self.fs = filesystem(log_dir, fs_options)
22
26
  self.fs.mkdir(self.log_dir, exist_ok=True)
23
- self.suffix = suffix
24
27
 
25
28
  def is_local(self) -> bool:
26
29
  return self.fs.is_local()
27
30
 
28
31
  @override
29
32
  @classmethod
30
- def read_log_sample(
33
+ async def read_log_sample(
31
34
  cls, location: str, id: str | int, epoch: int = 1
32
35
  ) -> EvalSample:
33
36
  # establish the log to read from (might be cached)
34
37
  if cls.__last_read_sample_log and (cls.__last_read_sample_log[0] == "location"):
35
38
  eval_log = cls.__last_read_sample_log[1]
36
39
  else:
37
- eval_log = cls.read_log(location)
40
+ eval_log = await cls.read_log(location)
38
41
  cls.__last_read_sample_log = (location, eval_log)
39
42
 
40
43
  # throw if no samples
@@ -1,3 +1,4 @@
1
+ from logging import getLogger
1
2
  from typing import Any, Literal, get_args
2
3
 
3
4
  import ijson # type: ignore
@@ -10,7 +11,9 @@ from inspect_ai._util.constants import LOG_SCHEMA_VERSION
10
11
  from inspect_ai._util.error import EvalError
11
12
  from inspect_ai._util.file import (
12
13
  absolute_file_path,
14
+ async_fileystem,
13
15
  file,
16
+ filesystem,
14
17
  )
15
18
 
16
19
  from .._log import (
@@ -25,6 +28,8 @@ from .._log import (
25
28
  )
26
29
  from .file import FileRecorder
27
30
 
31
+ logger = getLogger(__name__)
32
+
28
33
 
29
34
  class JSONRecorder(FileRecorder):
30
35
  @override
@@ -57,7 +62,7 @@ class JSONRecorder(FileRecorder):
57
62
  self.data: dict[str, JSONRecorder.JSONLogFile] = {}
58
63
 
59
64
  @override
60
- def log_init(self, eval: EvalSpec, location: str | None = None) -> str:
65
+ async def log_init(self, eval: EvalSpec, location: str | None = None) -> str:
61
66
  # initialize file log for this eval
62
67
  # compute an absolute path if it's a relative ref
63
68
  # (so that the writes go to the correct place even
@@ -75,19 +80,19 @@ class JSONRecorder(FileRecorder):
75
80
  return file
76
81
 
77
82
  @override
78
- def log_start(self, eval: EvalSpec, plan: EvalPlan) -> None:
83
+ async def log_start(self, eval: EvalSpec, plan: EvalPlan) -> None:
79
84
  log = self.data[self._log_file_key(eval)]
80
85
  log.data.plan = plan
81
86
 
82
87
  @override
83
- def log_sample(self, eval: EvalSpec, sample: EvalSample) -> None:
88
+ async def log_sample(self, eval: EvalSpec, sample: EvalSample) -> None:
84
89
  log = self.data[self._log_file_key(eval)]
85
90
  if log.data.samples is None:
86
91
  log.data.samples = []
87
92
  log.data.samples.append(sample)
88
93
 
89
94
  @override
90
- def log_finish(
95
+ async def log_finish(
91
96
  self,
92
97
  spec: EvalSpec,
93
98
  status: Literal["started", "success", "cancelled", "error"],
@@ -104,7 +109,7 @@ class JSONRecorder(FileRecorder):
104
109
  log.data.error = error
105
110
  if reductions:
106
111
  log.data.reductions = reductions
107
- self.write_log(log.file, log.data)
112
+ await self.write_log(log.file, log.data)
108
113
  log.data.location = log.file
109
114
 
110
115
  # stop tracking this data
@@ -114,13 +119,13 @@ class JSONRecorder(FileRecorder):
114
119
  return log.data
115
120
 
116
121
  @override
117
- def flush(self, eval: EvalSpec) -> None:
122
+ async def flush(self, eval: EvalSpec) -> None:
118
123
  log = self.data[self._log_file_key(eval)]
119
- self.write_log(log.file, log.data)
124
+ await self.write_log(log.file, log.data)
120
125
 
121
126
  @override
122
127
  @classmethod
123
- def read_log(cls, location: str, header_only: bool = False) -> EvalLog:
128
+ async def read_log(cls, location: str, header_only: bool = False) -> EvalLog:
124
129
  if header_only:
125
130
  try:
126
131
  return _read_header_streaming(location)
@@ -138,7 +143,7 @@ class JSONRecorder(FileRecorder):
138
143
  else:
139
144
  raise ValueError(f"Unable to read log file: {location}") from ex
140
145
 
141
- # parse full log (also used as a fallback for header_only encountering NaN or Inf)
146
+ # full reads (and fallback to streaing reads if they encounter invalid json characters)
142
147
  with file(location, "r") as f:
143
148
  # parse w/ pydantic
144
149
  raw_data = from_json(f.read())
@@ -166,15 +171,33 @@ class JSONRecorder(FileRecorder):
166
171
 
167
172
  @override
168
173
  @classmethod
169
- def write_log(cls, location: str, log: EvalLog) -> None:
174
+ async def write_log(cls, location: str, log: EvalLog) -> None:
170
175
  from inspect_ai.log._file import eval_log_json
171
176
 
172
177
  # sort samples before writing as they can come in out of order
173
178
  if log.samples:
174
179
  sort_samples(log.samples)
175
180
 
176
- with file(location, "w") as f:
177
- f.write(eval_log_json(log))
181
+ # get log as bytes
182
+ log_bytes = eval_log_json(log)
183
+
184
+ # try to write async for async filesystems
185
+ written = False
186
+ try:
187
+ fs = filesystem(location)
188
+ if fs.is_async():
189
+ async with async_fileystem(location) as async_fs:
190
+ await async_fs._pipe_file(location, log_bytes)
191
+ written = True
192
+ except Exception as ex:
193
+ logger.warning(
194
+ f"Error occurred during async write to {location}: {ex}. Falling back to sync write."
195
+ )
196
+
197
+ # otherwise use sync
198
+ if not written:
199
+ with file(location, "wb") as f:
200
+ f.write(log_bytes)
178
201
 
179
202
 
180
203
  def _validate_version(ver: int) -> None: