inspect-ai 0.3.102__py3-none-any.whl → 0.3.103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +1 -0
- inspect_ai/_eval/eval.py +13 -1
- inspect_ai/_eval/evalset.py +1 -0
- inspect_ai/_eval/run.py +2 -0
- inspect_ai/_eval/task/log.py +3 -1
- inspect_ai/log/_recorders/eval.py +4 -3
- inspect_ai/log/_recorders/json.py +1 -0
- inspect_ai/log/_recorders/recorder.py +1 -0
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/RECORD +14 -14
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/top_level.txt +0 -0
inspect_ai/_cli/eval.py
CHANGED
inspect_ai/_eval/eval.py
CHANGED
@@ -105,6 +105,7 @@ def eval(
|
|
105
105
|
log_images: bool | None = None,
|
106
106
|
log_buffer: int | None = None,
|
107
107
|
log_shared: bool | int | None = None,
|
108
|
+
log_header_only: bool | None = None,
|
108
109
|
score: bool = True,
|
109
110
|
score_display: bool | None = None,
|
110
111
|
**kwargs: Unpack[GenerateConfigArgs],
|
@@ -181,6 +182,8 @@ def eval(
|
|
181
182
|
log_shared: Sync sample events to log directory so that users on other systems
|
182
183
|
can see log updates in realtime (defaults to no syncing). Specify `True`
|
183
184
|
to sync every 10 seconds, otherwise an integer to sync every `n` seconds.
|
185
|
+
log_header_only: If `True`, the function should return only log headers rather
|
186
|
+
than full logs with samples (defaults to `False`).
|
184
187
|
score: Score output (defaults to True)
|
185
188
|
score_display: Show scoring metrics in realtime (defaults to True)
|
186
189
|
**kwargs: Model generation options.
|
@@ -234,6 +237,7 @@ def eval(
|
|
234
237
|
log_images=log_images,
|
235
238
|
log_buffer=log_buffer,
|
236
239
|
log_shared=log_shared,
|
240
|
+
log_header_only=log_header_only,
|
237
241
|
score=score,
|
238
242
|
score_display=score_display,
|
239
243
|
**kwargs,
|
@@ -288,6 +292,7 @@ async def eval_async(
|
|
288
292
|
log_images: bool | None = None,
|
289
293
|
log_buffer: int | None = None,
|
290
294
|
log_shared: bool | int | None = None,
|
295
|
+
log_header_only: bool | None = None,
|
291
296
|
score: bool = True,
|
292
297
|
score_display: bool | None = None,
|
293
298
|
**kwargs: Unpack[GenerateConfigArgs],
|
@@ -344,7 +349,9 @@ async def eval_async(
|
|
344
349
|
log_buffer: Number of samples to buffer before writing log file.
|
345
350
|
If not specified, an appropriate default for the format and filesystem is
|
346
351
|
chosen (10 for most all cases, 100 for JSON logs on remote filesystems).
|
347
|
-
log_shared: Indicate that the log directory is shared, which results in additional
|
352
|
+
log_shared: Indicate that the log directory is shared, which results in additional
|
353
|
+
syncing of realtime log data for Inspect View.
|
354
|
+
log_header_only: If `True`, the function should return only log headers rather than full logs with samples (defaults to `False`).
|
348
355
|
score: Score output (defaults to True)
|
349
356
|
score_display: Show scoring metrics in realtime (defaults to True)
|
350
357
|
**kwargs: Model generation options.
|
@@ -432,6 +439,9 @@ async def eval_async(
|
|
432
439
|
# resolve log_shared
|
433
440
|
log_shared = DEFAULT_LOG_SHARED if log_shared is True else log_shared
|
434
441
|
|
442
|
+
# resolve header only
|
443
|
+
log_header_only = log_header_only is True
|
444
|
+
|
435
445
|
# validate that --log-shared can't use used with 'json' format
|
436
446
|
if log_shared and log_format == JSON_LOG_FORMAT:
|
437
447
|
raise PrerequisiteError(
|
@@ -507,6 +517,7 @@ async def eval_async(
|
|
507
517
|
eval_config=eval_config,
|
508
518
|
eval_sandbox=sandbox,
|
509
519
|
recorder=recorder,
|
520
|
+
header_only=log_header_only,
|
510
521
|
epochs_reducer=epochs_reducer,
|
511
522
|
solver=solver,
|
512
523
|
tags=tags,
|
@@ -532,6 +543,7 @@ async def eval_async(
|
|
532
543
|
eval_config=eval_config,
|
533
544
|
eval_sandbox=sandbox,
|
534
545
|
recorder=recorder,
|
546
|
+
header_only=log_header_only,
|
535
547
|
epochs_reducer=epochs_reducer,
|
536
548
|
solver=solver,
|
537
549
|
tags=tags,
|
inspect_ai/_eval/evalset.py
CHANGED
inspect_ai/_eval/run.py
CHANGED
@@ -63,6 +63,7 @@ async def eval_run(
|
|
63
63
|
eval_config: EvalConfig,
|
64
64
|
eval_sandbox: SandboxEnvironmentType | None,
|
65
65
|
recorder: Recorder,
|
66
|
+
header_only: bool,
|
66
67
|
epochs_reducer: list[ScoreReducer] | None = None,
|
67
68
|
solver: Solver | SolverSpec | None = None,
|
68
69
|
tags: list[str] | None = None,
|
@@ -212,6 +213,7 @@ async def eval_run(
|
|
212
213
|
eval_config=task_eval_config,
|
213
214
|
metadata=((metadata or {}) | (task.metadata or {})) or None,
|
214
215
|
recorder=recorder,
|
216
|
+
header_only=header_only,
|
215
217
|
)
|
216
218
|
await logger.init()
|
217
219
|
|
inspect_ai/_eval/task/log.py
CHANGED
@@ -75,6 +75,7 @@ class TaskLogger:
|
|
75
75
|
eval_config: EvalConfig,
|
76
76
|
metadata: dict[str, Any] | None,
|
77
77
|
recorder: Recorder,
|
78
|
+
header_only: bool,
|
78
79
|
) -> None:
|
79
80
|
# determine versions
|
80
81
|
git = git_context()
|
@@ -153,6 +154,7 @@ class TaskLogger:
|
|
153
154
|
|
154
155
|
# stack recorder and location
|
155
156
|
self.recorder = recorder
|
157
|
+
self.header_only = header_only
|
156
158
|
|
157
159
|
# number of samples logged
|
158
160
|
self._samples_completed = 0
|
@@ -238,7 +240,7 @@ class TaskLogger:
|
|
238
240
|
) -> EvalLog:
|
239
241
|
# finish and get log
|
240
242
|
log = await self.recorder.log_finish(
|
241
|
-
self.eval, status, stats, results, reductions, error
|
243
|
+
self.eval, status, stats, results, reductions, error, self.header_only
|
242
244
|
)
|
243
245
|
|
244
246
|
# cleanup the events db
|
@@ -133,6 +133,7 @@ class EvalRecorder(FileRecorder):
|
|
133
133
|
results: EvalResults | None,
|
134
134
|
reductions: list[EvalSampleReductions] | None,
|
135
135
|
error: EvalError | None = None,
|
136
|
+
header_only: bool = False,
|
136
137
|
) -> EvalLog:
|
137
138
|
# get the key and log
|
138
139
|
key = self._log_file_key(eval)
|
@@ -174,7 +175,7 @@ class EvalRecorder(FileRecorder):
|
|
174
175
|
|
175
176
|
# flush and write the results
|
176
177
|
await log.flush()
|
177
|
-
return await log.close()
|
178
|
+
return await log.close(header_only)
|
178
179
|
|
179
180
|
@classmethod
|
180
181
|
@override
|
@@ -321,12 +322,12 @@ class ZipLogFile:
|
|
321
322
|
# re-open zip file w/ self.temp_file pointer at end
|
322
323
|
self._open()
|
323
324
|
|
324
|
-
async def close(self) -> EvalLog:
|
325
|
+
async def close(self, header_only: bool) -> EvalLog:
|
325
326
|
async with self._lock:
|
326
327
|
# read the log from the temp file then close it
|
327
328
|
try:
|
328
329
|
self._temp_file.seek(0)
|
329
|
-
return _read_log(self._temp_file, self._file)
|
330
|
+
return _read_log(self._temp_file, self._file, header_only=header_only)
|
330
331
|
finally:
|
331
332
|
self._temp_file.close()
|
332
333
|
if self._zip:
|
@@ -96,6 +96,7 @@ class JSONRecorder(FileRecorder):
|
|
96
96
|
results: EvalResults | None,
|
97
97
|
reductions: list[EvalSampleReductions] | None,
|
98
98
|
error: EvalError | None = None,
|
99
|
+
header_only: bool = False,
|
99
100
|
) -> EvalLog:
|
100
101
|
log = self.data[self._log_file_key(spec)]
|
101
102
|
log.data.status = status
|
@@ -3,7 +3,7 @@ inspect_ai/__main__.py,sha256=oWX4YwDZDg3GS3-IG0yPGoSEOfSzWihELg7QmrUlxjM,67
|
|
3
3
|
inspect_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
inspect_ai/_cli/cache.py,sha256=RVGuBYwwk3c45JfyfcSFJ419etSsv3-Z7AyfQE-Hul0,3912
|
5
5
|
inspect_ai/_cli/common.py,sha256=WbAgGbLcxABcWoEWiGCWSbkkxYr2jmL9i0cyseWHmRA,4165
|
6
|
-
inspect_ai/_cli/eval.py,sha256=
|
6
|
+
inspect_ai/_cli/eval.py,sha256=H1UBbStuwX28f0fDf3hs7UF3xqD5U-M9WcS0JyvK4sI,39478
|
7
7
|
inspect_ai/_cli/info.py,sha256=QMxaTG9TmzW95EiLrOgkzubvavoR-VHxo3eV7ppmrzI,1789
|
8
8
|
inspect_ai/_cli/list.py,sha256=M8mazI8Zuq8Hp99YWKnxQd9UWx1Qi87zfXRzZYAAakk,2459
|
9
9
|
inspect_ai/_cli/log.py,sha256=Ko4TLqoy1jD3DoL50EkcQMVc16BzkjYo8qrZBakC4e8,5818
|
@@ -45,12 +45,12 @@ inspect_ai/_display/textual/widgets/transcript.py,sha256=fmCJwe1EZ7bjeB6DXakQ2l3
|
|
45
45
|
inspect_ai/_display/textual/widgets/vscode.py,sha256=SAIPO8VOkT_CFIfnCP_XxKixojdYXxMNdYU3Z2mq5Ek,1298
|
46
46
|
inspect_ai/_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
47
|
inspect_ai/_eval/context.py,sha256=mdYinWG2lcYkWLieT42suzUDyaQBVHosbaWTKA6Uu48,1407
|
48
|
-
inspect_ai/_eval/eval.py,sha256=
|
49
|
-
inspect_ai/_eval/evalset.py,sha256=
|
48
|
+
inspect_ai/_eval/eval.py,sha256=4qOG1tulHZajfUX5RGFXVnrW5ajel3TdAwlvzAqJmKU,44161
|
49
|
+
inspect_ai/_eval/evalset.py,sha256=V_nE7xzKzxlEqC-AZwqbYZ9FWzEfQM0oiLXb1zRZAgw,25073
|
50
50
|
inspect_ai/_eval/list.py,sha256=VbZ-2EI6MqrXvCN7VTz21TQSoU5K5_Q0hqhxmj5A_m0,3744
|
51
51
|
inspect_ai/_eval/loader.py,sha256=dafv4TlQDqdvzPyrQrBsNiCzhvqjwmcVQzweX-AL1os,24805
|
52
52
|
inspect_ai/_eval/registry.py,sha256=8Cm-qyDB6Fthea8DUe-QES9plly_Pf2MUuCgeNQ3fOY,5303
|
53
|
-
inspect_ai/_eval/run.py,sha256=
|
53
|
+
inspect_ai/_eval/run.py,sha256=W3dMWHVHTyx_eqE6wV0bjDWol9ZQUON1oQtZHZ2HK6E,21299
|
54
54
|
inspect_ai/_eval/score.py,sha256=KodaNhMCE1KV8qS33zj7Q8I0LD080WRCb32tyg1956w,10443
|
55
55
|
inspect_ai/_eval/task/__init__.py,sha256=6FvojMW3yo36L7xDacppCHDxt6A8_tzj_ftg5bQ6eNk,199
|
56
56
|
inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
|
@@ -58,7 +58,7 @@ inspect_ai/_eval/task/epochs.py,sha256=Ci7T6CQniSOTChv5Im2dCdSDrP-5hq19rV6iJ2uBc
|
|
58
58
|
inspect_ai/_eval/task/error.py,sha256=Vhqinfdf0eIrjn7kUY7-id8Kbdggr-fEFpAJeJrkJ1M,1244
|
59
59
|
inspect_ai/_eval/task/generate.py,sha256=yzeGlRUgIut-3OvF0xyx1ZjuJS61nR7thHV8tqv_aTE,2146
|
60
60
|
inspect_ai/_eval/task/images.py,sha256=nTzHizlyuPYumPH7gAOBSrNkTwTbAmZ7tKdzN7d_R2k,4035
|
61
|
-
inspect_ai/_eval/task/log.py,sha256=
|
61
|
+
inspect_ai/_eval/task/log.py,sha256=O7cS-QHlJiV8JV-7G4-Eu06qYXAYf-GCCqBOSFikJ30,11919
|
62
62
|
inspect_ai/_eval/task/resolved.py,sha256=LBVHEeq9N1fkRObmA2pnDE_l_EuH6n2Dg8-c8yCGT5U,1007
|
63
63
|
inspect_ai/_eval/task/results.py,sha256=x4weYRK2XGowfBG3f2msOeZQ_pxh230HTlw6kps33jw,17925
|
64
64
|
inspect_ai/_eval/task/run.py,sha256=VdqQnHqP_fWog_Re3L-kxN8MRAU41tU9xqfFILvNN7E,39120
|
@@ -591,10 +591,10 @@ inspect_ai/log/_tree.py,sha256=C817m_7-66ThyCX5K4nVA7AzYOgLXWlKMdTQ-ueNA-U,3232
|
|
591
591
|
inspect_ai/log/_util.py,sha256=j7jeqDendiCt12U_iaPQj8fLgTA44pk04ZM1tGQdau4,1699
|
592
592
|
inspect_ai/log/_recorders/__init__.py,sha256=qMm2y1HOzS499ZTXHOQExSN8PJ-I3LnH35icbP2m4VU,412
|
593
593
|
inspect_ai/log/_recorders/create.py,sha256=WB-fms0dBDHlTtTa_a_r0fFc6UPRvQZKZT7d_Inp-EU,1103
|
594
|
-
inspect_ai/log/_recorders/eval.py,sha256=
|
594
|
+
inspect_ai/log/_recorders/eval.py,sha256=4ZsidUM9td3evoEalPkVZCZYajQhLGRk9Qu3SNLQU4Q,16062
|
595
595
|
inspect_ai/log/_recorders/file.py,sha256=aY1aGOwWfoXUD7c_imrcN9rqCFA-6xEjAef0HIPOaBM,3707
|
596
|
-
inspect_ai/log/_recorders/json.py,sha256=
|
597
|
-
inspect_ai/log/_recorders/recorder.py,sha256=
|
596
|
+
inspect_ai/log/_recorders/json.py,sha256=0CxvvlyeUQQph0fsRpgHnfooyCt7E_xnj58s2yeNp_w,8817
|
597
|
+
inspect_ai/log/_recorders/recorder.py,sha256=yXv8DXjegZxfKirPZOnfJQoXsl-AKJRL0e9r7S8Jo4s,1822
|
598
598
|
inspect_ai/log/_recorders/types.py,sha256=cV7eFpXsbmXVw4r7xw1RdJC6Jltvw9xLXYPbFsYW7Oc,159
|
599
599
|
inspect_ai/log/_recorders/buffer/__init__.py,sha256=6DsRdnNl-ic-xJmnBE5i45ZP3eB4yAta9wxi5WFcbqc,367
|
600
600
|
inspect_ai/log/_recorders/buffer/buffer.py,sha256=rtLvaX7nSqNrWb-3CeSaOHwJgF1CzRgXFT_I1dDkM1k,945
|
@@ -758,9 +758,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
|
|
758
758
|
inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
|
759
759
|
inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
|
760
760
|
inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
|
761
|
-
inspect_ai-0.3.
|
762
|
-
inspect_ai-0.3.
|
763
|
-
inspect_ai-0.3.
|
764
|
-
inspect_ai-0.3.
|
765
|
-
inspect_ai-0.3.
|
766
|
-
inspect_ai-0.3.
|
761
|
+
inspect_ai-0.3.103.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
|
762
|
+
inspect_ai-0.3.103.dist-info/METADATA,sha256=IELaF2h46Zjz8Saj1dK6_qqjQPyg_kFej7zvfH7uO8s,5460
|
763
|
+
inspect_ai-0.3.103.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
764
|
+
inspect_ai-0.3.103.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
|
765
|
+
inspect_ai-0.3.103.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
|
766
|
+
inspect_ai-0.3.103.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|