inspect-ai 0.3.102__py3-none-any.whl → 0.3.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
inspect_ai/_cli/eval.py CHANGED
@@ -966,6 +966,7 @@ def eval_exec(
966
966
  success, _ = eval_set(**params)
967
967
  return success
968
968
  else:
969
+ params["log_header_only"] = (True,) # cli invocation doesn't need full log
969
970
  eval(**params)
970
971
  return True
971
972
 
inspect_ai/_eval/eval.py CHANGED
@@ -105,6 +105,7 @@ def eval(
105
105
  log_images: bool | None = None,
106
106
  log_buffer: int | None = None,
107
107
  log_shared: bool | int | None = None,
108
+ log_header_only: bool | None = None,
108
109
  score: bool = True,
109
110
  score_display: bool | None = None,
110
111
  **kwargs: Unpack[GenerateConfigArgs],
@@ -181,6 +182,8 @@ def eval(
181
182
  log_shared: Sync sample events to log directory so that users on other systems
182
183
  can see log updates in realtime (defaults to no syncing). Specify `True`
183
184
  to sync every 10 seconds, otherwise an integer to sync every `n` seconds.
185
+ log_header_only: If `True`, the function should return only log headers rather
186
+ than full logs with samples (defaults to `False`).
184
187
  score: Score output (defaults to True)
185
188
  score_display: Show scoring metrics in realtime (defaults to True)
186
189
  **kwargs: Model generation options.
@@ -234,6 +237,7 @@ def eval(
234
237
  log_images=log_images,
235
238
  log_buffer=log_buffer,
236
239
  log_shared=log_shared,
240
+ log_header_only=log_header_only,
237
241
  score=score,
238
242
  score_display=score_display,
239
243
  **kwargs,
@@ -288,6 +292,7 @@ async def eval_async(
288
292
  log_images: bool | None = None,
289
293
  log_buffer: int | None = None,
290
294
  log_shared: bool | int | None = None,
295
+ log_header_only: bool | None = None,
291
296
  score: bool = True,
292
297
  score_display: bool | None = None,
293
298
  **kwargs: Unpack[GenerateConfigArgs],
@@ -344,7 +349,9 @@ async def eval_async(
344
349
  log_buffer: Number of samples to buffer before writing log file.
345
350
  If not specified, an appropriate default for the format and filesystem is
346
351
  chosen (10 for most all cases, 100 for JSON logs on remote filesystems).
347
- log_shared: Indicate that the log directory is shared, which results in additional syncing of realtime log data for Inspect View.
352
+ log_shared: Indicate that the log directory is shared, which results in additional
353
+ syncing of realtime log data for Inspect View.
354
+ log_header_only: If `True`, the function should return only log headers rather than full logs with samples (defaults to `False`).
348
355
  score: Score output (defaults to True)
349
356
  score_display: Show scoring metrics in realtime (defaults to True)
350
357
  **kwargs: Model generation options.
@@ -432,6 +439,9 @@ async def eval_async(
432
439
  # resolve log_shared
433
440
  log_shared = DEFAULT_LOG_SHARED if log_shared is True else log_shared
434
441
 
442
+ # resolve header only
443
+ log_header_only = log_header_only is True
444
+
435
445
  # validate that --log-shared can't use used with 'json' format
436
446
  if log_shared and log_format == JSON_LOG_FORMAT:
437
447
  raise PrerequisiteError(
@@ -507,6 +517,7 @@ async def eval_async(
507
517
  eval_config=eval_config,
508
518
  eval_sandbox=sandbox,
509
519
  recorder=recorder,
520
+ header_only=log_header_only,
510
521
  epochs_reducer=epochs_reducer,
511
522
  solver=solver,
512
523
  tags=tags,
@@ -532,6 +543,7 @@ async def eval_async(
532
543
  eval_config=eval_config,
533
544
  eval_sandbox=sandbox,
534
545
  recorder=recorder,
546
+ header_only=log_header_only,
535
547
  epochs_reducer=epochs_reducer,
536
548
  solver=solver,
537
549
  tags=tags,
@@ -235,6 +235,7 @@ def eval_set(
235
235
  log_images=log_images,
236
236
  log_buffer=log_buffer,
237
237
  log_shared=log_shared,
238
+ log_header_only=True,
238
239
  score=score,
239
240
  **kwargs,
240
241
  )
inspect_ai/_eval/run.py CHANGED
@@ -63,6 +63,7 @@ async def eval_run(
63
63
  eval_config: EvalConfig,
64
64
  eval_sandbox: SandboxEnvironmentType | None,
65
65
  recorder: Recorder,
66
+ header_only: bool,
66
67
  epochs_reducer: list[ScoreReducer] | None = None,
67
68
  solver: Solver | SolverSpec | None = None,
68
69
  tags: list[str] | None = None,
@@ -212,6 +213,7 @@ async def eval_run(
212
213
  eval_config=task_eval_config,
213
214
  metadata=((metadata or {}) | (task.metadata or {})) or None,
214
215
  recorder=recorder,
216
+ header_only=header_only,
215
217
  )
216
218
  await logger.init()
217
219
 
@@ -75,6 +75,7 @@ class TaskLogger:
75
75
  eval_config: EvalConfig,
76
76
  metadata: dict[str, Any] | None,
77
77
  recorder: Recorder,
78
+ header_only: bool,
78
79
  ) -> None:
79
80
  # determine versions
80
81
  git = git_context()
@@ -153,6 +154,7 @@ class TaskLogger:
153
154
 
154
155
  # stack recorder and location
155
156
  self.recorder = recorder
157
+ self.header_only = header_only
156
158
 
157
159
  # number of samples logged
158
160
  self._samples_completed = 0
@@ -238,7 +240,7 @@ class TaskLogger:
238
240
  ) -> EvalLog:
239
241
  # finish and get log
240
242
  log = await self.recorder.log_finish(
241
- self.eval, status, stats, results, reductions, error
243
+ self.eval, status, stats, results, reductions, error, self.header_only
242
244
  )
243
245
 
244
246
  # cleanup the events db
@@ -133,6 +133,7 @@ class EvalRecorder(FileRecorder):
133
133
  results: EvalResults | None,
134
134
  reductions: list[EvalSampleReductions] | None,
135
135
  error: EvalError | None = None,
136
+ header_only: bool = False,
136
137
  ) -> EvalLog:
137
138
  # get the key and log
138
139
  key = self._log_file_key(eval)
@@ -174,7 +175,7 @@ class EvalRecorder(FileRecorder):
174
175
 
175
176
  # flush and write the results
176
177
  await log.flush()
177
- return await log.close()
178
+ return await log.close(header_only)
178
179
 
179
180
  @classmethod
180
181
  @override
@@ -321,12 +322,12 @@ class ZipLogFile:
321
322
  # re-open zip file w/ self.temp_file pointer at end
322
323
  self._open()
323
324
 
324
- async def close(self) -> EvalLog:
325
+ async def close(self, header_only: bool) -> EvalLog:
325
326
  async with self._lock:
326
327
  # read the log from the temp file then close it
327
328
  try:
328
329
  self._temp_file.seek(0)
329
- return _read_log(self._temp_file, self._file)
330
+ return _read_log(self._temp_file, self._file, header_only=header_only)
330
331
  finally:
331
332
  self._temp_file.close()
332
333
  if self._zip:
@@ -96,6 +96,7 @@ class JSONRecorder(FileRecorder):
96
96
  results: EvalResults | None,
97
97
  reductions: list[EvalSampleReductions] | None,
98
98
  error: EvalError | None = None,
99
+ header_only: bool = False,
99
100
  ) -> EvalLog:
100
101
  log = self.data[self._log_file_key(spec)]
101
102
  log.data.status = status
@@ -46,6 +46,7 @@ class Recorder(abc.ABC):
46
46
  results: EvalResults | None,
47
47
  reductions: list[EvalSampleReductions] | None,
48
48
  error: EvalError | None = None,
49
+ header_only: bool = False,
49
50
  ) -> EvalLog: ...
50
51
 
51
52
  @classmethod
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.102
3
+ Version: 0.3.103
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
@@ -3,7 +3,7 @@ inspect_ai/__main__.py,sha256=oWX4YwDZDg3GS3-IG0yPGoSEOfSzWihELg7QmrUlxjM,67
3
3
  inspect_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  inspect_ai/_cli/cache.py,sha256=RVGuBYwwk3c45JfyfcSFJ419etSsv3-Z7AyfQE-Hul0,3912
5
5
  inspect_ai/_cli/common.py,sha256=WbAgGbLcxABcWoEWiGCWSbkkxYr2jmL9i0cyseWHmRA,4165
6
- inspect_ai/_cli/eval.py,sha256=zX1QcvPAi-Vjk8NQUVlG48sa3vowTXAjmv0kQogtGiw,39394
6
+ inspect_ai/_cli/eval.py,sha256=H1UBbStuwX28f0fDf3hs7UF3xqD5U-M9WcS0JyvK4sI,39478
7
7
  inspect_ai/_cli/info.py,sha256=QMxaTG9TmzW95EiLrOgkzubvavoR-VHxo3eV7ppmrzI,1789
8
8
  inspect_ai/_cli/list.py,sha256=M8mazI8Zuq8Hp99YWKnxQd9UWx1Qi87zfXRzZYAAakk,2459
9
9
  inspect_ai/_cli/log.py,sha256=Ko4TLqoy1jD3DoL50EkcQMVc16BzkjYo8qrZBakC4e8,5818
@@ -45,12 +45,12 @@ inspect_ai/_display/textual/widgets/transcript.py,sha256=fmCJwe1EZ7bjeB6DXakQ2l3
45
45
  inspect_ai/_display/textual/widgets/vscode.py,sha256=SAIPO8VOkT_CFIfnCP_XxKixojdYXxMNdYU3Z2mq5Ek,1298
46
46
  inspect_ai/_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  inspect_ai/_eval/context.py,sha256=mdYinWG2lcYkWLieT42suzUDyaQBVHosbaWTKA6Uu48,1407
48
- inspect_ai/_eval/eval.py,sha256=mj7Blv-bsd1qgswWWSbA1rphiyQpLEZIxmbQ1hAExC0,43555
49
- inspect_ai/_eval/evalset.py,sha256=qzv0VFqHEp_9E2G3IwNRLqYD6gdKK3XzAwPJGbiABhU,25039
48
+ inspect_ai/_eval/eval.py,sha256=4qOG1tulHZajfUX5RGFXVnrW5ajel3TdAwlvzAqJmKU,44161
49
+ inspect_ai/_eval/evalset.py,sha256=V_nE7xzKzxlEqC-AZwqbYZ9FWzEfQM0oiLXb1zRZAgw,25073
50
50
  inspect_ai/_eval/list.py,sha256=VbZ-2EI6MqrXvCN7VTz21TQSoU5K5_Q0hqhxmj5A_m0,3744
51
51
  inspect_ai/_eval/loader.py,sha256=dafv4TlQDqdvzPyrQrBsNiCzhvqjwmcVQzweX-AL1os,24805
52
52
  inspect_ai/_eval/registry.py,sha256=8Cm-qyDB6Fthea8DUe-QES9plly_Pf2MUuCgeNQ3fOY,5303
53
- inspect_ai/_eval/run.py,sha256=PYP3zi5OjPguBkMNS6Nq4KrbEBiSH7ebdBsKHpjYhOE,21231
53
+ inspect_ai/_eval/run.py,sha256=W3dMWHVHTyx_eqE6wV0bjDWol9ZQUON1oQtZHZ2HK6E,21299
54
54
  inspect_ai/_eval/score.py,sha256=KodaNhMCE1KV8qS33zj7Q8I0LD080WRCb32tyg1956w,10443
55
55
  inspect_ai/_eval/task/__init__.py,sha256=6FvojMW3yo36L7xDacppCHDxt6A8_tzj_ftg5bQ6eNk,199
56
56
  inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
@@ -58,7 +58,7 @@ inspect_ai/_eval/task/epochs.py,sha256=Ci7T6CQniSOTChv5Im2dCdSDrP-5hq19rV6iJ2uBc
58
58
  inspect_ai/_eval/task/error.py,sha256=Vhqinfdf0eIrjn7kUY7-id8Kbdggr-fEFpAJeJrkJ1M,1244
59
59
  inspect_ai/_eval/task/generate.py,sha256=yzeGlRUgIut-3OvF0xyx1ZjuJS61nR7thHV8tqv_aTE,2146
60
60
  inspect_ai/_eval/task/images.py,sha256=nTzHizlyuPYumPH7gAOBSrNkTwTbAmZ7tKdzN7d_R2k,4035
61
- inspect_ai/_eval/task/log.py,sha256=g5KACnK_QuOocjhhYgeWsSx6n9cahuX4l6hiYET25qE,11835
61
+ inspect_ai/_eval/task/log.py,sha256=O7cS-QHlJiV8JV-7G4-Eu06qYXAYf-GCCqBOSFikJ30,11919
62
62
  inspect_ai/_eval/task/resolved.py,sha256=LBVHEeq9N1fkRObmA2pnDE_l_EuH6n2Dg8-c8yCGT5U,1007
63
63
  inspect_ai/_eval/task/results.py,sha256=x4weYRK2XGowfBG3f2msOeZQ_pxh230HTlw6kps33jw,17925
64
64
  inspect_ai/_eval/task/run.py,sha256=VdqQnHqP_fWog_Re3L-kxN8MRAU41tU9xqfFILvNN7E,39120
@@ -591,10 +591,10 @@ inspect_ai/log/_tree.py,sha256=C817m_7-66ThyCX5K4nVA7AzYOgLXWlKMdTQ-ueNA-U,3232
591
591
  inspect_ai/log/_util.py,sha256=j7jeqDendiCt12U_iaPQj8fLgTA44pk04ZM1tGQdau4,1699
592
592
  inspect_ai/log/_recorders/__init__.py,sha256=qMm2y1HOzS499ZTXHOQExSN8PJ-I3LnH35icbP2m4VU,412
593
593
  inspect_ai/log/_recorders/create.py,sha256=WB-fms0dBDHlTtTa_a_r0fFc6UPRvQZKZT7d_Inp-EU,1103
594
- inspect_ai/log/_recorders/eval.py,sha256=tPO0Jh56d4VOPjjqCl1mHEQEkpdfhcC2rvA6BAdWedY,15972
594
+ inspect_ai/log/_recorders/eval.py,sha256=4ZsidUM9td3evoEalPkVZCZYajQhLGRk9Qu3SNLQU4Q,16062
595
595
  inspect_ai/log/_recorders/file.py,sha256=aY1aGOwWfoXUD7c_imrcN9rqCFA-6xEjAef0HIPOaBM,3707
596
- inspect_ai/log/_recorders/json.py,sha256=sF4cc-_TrUgRxngHwj0p7Rsjy67XTbWSH_SRCEqz9RQ,8782
597
- inspect_ai/log/_recorders/recorder.py,sha256=Fn4lrfUJmxIhxy3hhQzpDjVABtCXQ7wtT0vRykzQ0YA,1787
596
+ inspect_ai/log/_recorders/json.py,sha256=0CxvvlyeUQQph0fsRpgHnfooyCt7E_xnj58s2yeNp_w,8817
597
+ inspect_ai/log/_recorders/recorder.py,sha256=yXv8DXjegZxfKirPZOnfJQoXsl-AKJRL0e9r7S8Jo4s,1822
598
598
  inspect_ai/log/_recorders/types.py,sha256=cV7eFpXsbmXVw4r7xw1RdJC6Jltvw9xLXYPbFsYW7Oc,159
599
599
  inspect_ai/log/_recorders/buffer/__init__.py,sha256=6DsRdnNl-ic-xJmnBE5i45ZP3eB4yAta9wxi5WFcbqc,367
600
600
  inspect_ai/log/_recorders/buffer/buffer.py,sha256=rtLvaX7nSqNrWb-3CeSaOHwJgF1CzRgXFT_I1dDkM1k,945
@@ -758,9 +758,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
758
758
  inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
759
759
  inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
760
760
  inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
761
- inspect_ai-0.3.102.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
762
- inspect_ai-0.3.102.dist-info/METADATA,sha256=oeFq80JBy8nfGBlfCzuBmSlBfy4ybvQ1z-E1WB1ItY0,5460
763
- inspect_ai-0.3.102.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
764
- inspect_ai-0.3.102.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
765
- inspect_ai-0.3.102.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
766
- inspect_ai-0.3.102.dist-info/RECORD,,
761
+ inspect_ai-0.3.103.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
762
+ inspect_ai-0.3.103.dist-info/METADATA,sha256=IELaF2h46Zjz8Saj1dK6_qqjQPyg_kFej7zvfH7uO8s,5460
763
+ inspect_ai-0.3.103.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
764
+ inspect_ai-0.3.103.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
765
+ inspect_ai-0.3.103.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
766
+ inspect_ai-0.3.103.dist-info/RECORD,,