PyPI - inspect-ai - Versions diffs - 0.3.102__py3-none-any.whl → 0.3.103__py3-none-any.whl - Mend

inspect-ai 0.3.102py3-none-any.whl → 0.3.103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

inspect_ai/_cli/eval.py CHANGED Viewed

@@ -966,6 +966,7 @@ def eval_exec(
         success, _ = eval_set(**params)
         return success
     else:
+        params["log_header_only"] = (True,)  # cli invocation doesn't need full log
         eval(**params)
         return True

inspect_ai/_eval/eval.py CHANGED Viewed

@@ -105,6 +105,7 @@ def eval(
     log_images: bool | None = None,
     log_buffer: int | None = None,
     log_shared: bool | int | None = None,
+    log_header_only: bool | None = None,
     score: bool = True,
     score_display: bool | None = None,
     **kwargs: Unpack[GenerateConfigArgs],
@@ -181,6 +182,8 @@ def eval(
         log_shared: Sync sample events to log directory so that users on other systems
             can see log updates in realtime (defaults to no syncing). Specify `True`
             to sync every 10 seconds, otherwise an integer to sync every `n` seconds.
+        log_header_only: If `True`, the function should return only log headers rather
+            than full logs with samples (defaults to `False`).
         score: Score output (defaults to True)
         score_display: Show scoring metrics in realtime (defaults to True)
         **kwargs: Model generation options.
@@ -234,6 +237,7 @@ def eval(
                 log_images=log_images,
                 log_buffer=log_buffer,
                 log_shared=log_shared,
+                log_header_only=log_header_only,
                 score=score,
                 score_display=score_display,
                 **kwargs,
@@ -288,6 +292,7 @@ async def eval_async(
     log_images: bool | None = None,
     log_buffer: int | None = None,
     log_shared: bool | int | None = None,
+    log_header_only: bool | None = None,
     score: bool = True,
     score_display: bool | None = None,
     **kwargs: Unpack[GenerateConfigArgs],
@@ -344,7 +349,9 @@ async def eval_async(
         log_buffer: Number of samples to buffer before writing log file.
            If not specified, an appropriate default for the format and filesystem is
            chosen (10 for most all cases, 100 for JSON logs on remote filesystems).
-        log_shared: Indicate that the log directory is shared, which results in additional syncing of realtime log data for Inspect View.
+        log_shared: Indicate that the log directory is shared, which results in additional
+        syncing of realtime log data for Inspect View.
+        log_header_only: If `True`, the function should return only log headers rather than full logs with samples (defaults to `False`).
         score: Score output (defaults to True)
         score_display: Show scoring metrics in realtime (defaults to True)
         **kwargs: Model generation options.
@@ -432,6 +439,9 @@ async def eval_async(
         # resolve log_shared
         log_shared = DEFAULT_LOG_SHARED if log_shared is True else log_shared
+        # resolve header only
+        log_header_only = log_header_only is True
         # validate that --log-shared can't use used with 'json' format
         if log_shared and log_format == JSON_LOG_FORMAT:
             raise PrerequisiteError(
@@ -507,6 +517,7 @@ async def eval_async(
                         eval_config=eval_config,
                         eval_sandbox=sandbox,
                         recorder=recorder,
+                        header_only=log_header_only,
                         epochs_reducer=epochs_reducer,
                         solver=solver,
                         tags=tags,
@@ -532,6 +543,7 @@ async def eval_async(
                 eval_config=eval_config,
                 eval_sandbox=sandbox,
                 recorder=recorder,
+                header_only=log_header_only,
                 epochs_reducer=epochs_reducer,
                 solver=solver,
                 tags=tags,

inspect_ai/_eval/evalset.py CHANGED Viewed

@@ -235,6 +235,7 @@ def eval_set(
             log_images=log_images,
             log_buffer=log_buffer,
             log_shared=log_shared,
+            log_header_only=True,
             score=score,
             **kwargs,
         )

inspect_ai/_eval/run.py CHANGED Viewed

@@ -63,6 +63,7 @@ async def eval_run(
     eval_config: EvalConfig,
     eval_sandbox: SandboxEnvironmentType | None,
     recorder: Recorder,
+    header_only: bool,
     epochs_reducer: list[ScoreReducer] | None = None,
     solver: Solver | SolverSpec | None = None,
     tags: list[str] | None = None,
@@ -212,6 +213,7 @@ async def eval_run(
                     eval_config=task_eval_config,
                     metadata=((metadata or {}) | (task.metadata or {})) or None,
                     recorder=recorder,
+                    header_only=header_only,
                 )
                 await logger.init()

inspect_ai/_eval/task/log.py CHANGED Viewed

@@ -75,6 +75,7 @@ class TaskLogger:
         eval_config: EvalConfig,
         metadata: dict[str, Any] | None,
         recorder: Recorder,
+        header_only: bool,
     ) -> None:
         # determine versions
         git = git_context()
@@ -153,6 +154,7 @@ class TaskLogger:
         # stack recorder and location
         self.recorder = recorder
+        self.header_only = header_only
         # number of samples logged
         self._samples_completed = 0
@@ -238,7 +240,7 @@ class TaskLogger:
     ) -> EvalLog:
         # finish and get log
         log = await self.recorder.log_finish(
-            self.eval, status, stats, results, reductions, error
+            self.eval, status, stats, results, reductions, error, self.header_only
         )
         # cleanup the events db

inspect_ai/log/_recorders/eval.py CHANGED Viewed

@@ -133,6 +133,7 @@ class EvalRecorder(FileRecorder):
         results: EvalResults | None,
         reductions: list[EvalSampleReductions] | None,
         error: EvalError | None = None,
+        header_only: bool = False,
     ) -> EvalLog:
         # get the key and log
         key = self._log_file_key(eval)
@@ -174,7 +175,7 @@ class EvalRecorder(FileRecorder):
         # flush and write the results
         await log.flush()
-        return await log.close()
+        return await log.close(header_only)
     @classmethod
     @override
@@ -321,12 +322,12 @@ class ZipLogFile:
                     # re-open zip file w/ self.temp_file pointer at end
                     self._open()
-    async def close(self) -> EvalLog:
+    async def close(self, header_only: bool) -> EvalLog:
         async with self._lock:
             # read the log from the temp file then close it
             try:
                 self._temp_file.seek(0)
-                return _read_log(self._temp_file, self._file)
+                return _read_log(self._temp_file, self._file, header_only=header_only)
             finally:
                 self._temp_file.close()
                 if self._zip:

inspect_ai/log/_recorders/json.py CHANGED Viewed

@@ -96,6 +96,7 @@ class JSONRecorder(FileRecorder):
         results: EvalResults | None,
         reductions: list[EvalSampleReductions] | None,
         error: EvalError | None = None,
+        header_only: bool = False,
     ) -> EvalLog:
         log = self.data[self._log_file_key(spec)]
         log.data.status = status

inspect_ai/log/_recorders/recorder.py CHANGED Viewed

@@ -46,6 +46,7 @@ class Recorder(abc.ABC):
         results: EvalResults | None,
         reductions: list[EvalSampleReductions] | None,
         error: EvalError | None = None,
+        header_only: bool = False,
     ) -> EvalLog: ...
     @classmethod

{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inspect_ai
-Version: 0.3.102
+Version: 0.3.103
 Summary: Framework for large language model evaluations
 Author: UK AI Security Institute
 License: MIT License

{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ inspect_ai/__main__.py,sha256=oWX4YwDZDg3GS3-IG0yPGoSEOfSzWihELg7QmrUlxjM,67
 inspect_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 inspect_ai/_cli/cache.py,sha256=RVGuBYwwk3c45JfyfcSFJ419etSsv3-Z7AyfQE-Hul0,3912
 inspect_ai/_cli/common.py,sha256=WbAgGbLcxABcWoEWiGCWSbkkxYr2jmL9i0cyseWHmRA,4165
-inspect_ai/_cli/eval.py,sha256=zX1QcvPAi-Vjk8NQUVlG48sa3vowTXAjmv0kQogtGiw,39394
+inspect_ai/_cli/eval.py,sha256=H1UBbStuwX28f0fDf3hs7UF3xqD5U-M9WcS0JyvK4sI,39478
 inspect_ai/_cli/info.py,sha256=QMxaTG9TmzW95EiLrOgkzubvavoR-VHxo3eV7ppmrzI,1789
 inspect_ai/_cli/list.py,sha256=M8mazI8Zuq8Hp99YWKnxQd9UWx1Qi87zfXRzZYAAakk,2459
 inspect_ai/_cli/log.py,sha256=Ko4TLqoy1jD3DoL50EkcQMVc16BzkjYo8qrZBakC4e8,5818
@@ -45,12 +45,12 @@ inspect_ai/_display/textual/widgets/transcript.py,sha256=fmCJwe1EZ7bjeB6DXakQ2l3
 inspect_ai/_display/textual/widgets/vscode.py,sha256=SAIPO8VOkT_CFIfnCP_XxKixojdYXxMNdYU3Z2mq5Ek,1298
 inspect_ai/_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 inspect_ai/_eval/context.py,sha256=mdYinWG2lcYkWLieT42suzUDyaQBVHosbaWTKA6Uu48,1407
-inspect_ai/_eval/eval.py,sha256=mj7Blv-bsd1qgswWWSbA1rphiyQpLEZIxmbQ1hAExC0,43555
-inspect_ai/_eval/evalset.py,sha256=qzv0VFqHEp_9E2G3IwNRLqYD6gdKK3XzAwPJGbiABhU,25039
+inspect_ai/_eval/eval.py,sha256=4qOG1tulHZajfUX5RGFXVnrW5ajel3TdAwlvzAqJmKU,44161
+inspect_ai/_eval/evalset.py,sha256=V_nE7xzKzxlEqC-AZwqbYZ9FWzEfQM0oiLXb1zRZAgw,25073
 inspect_ai/_eval/list.py,sha256=VbZ-2EI6MqrXvCN7VTz21TQSoU5K5_Q0hqhxmj5A_m0,3744
 inspect_ai/_eval/loader.py,sha256=dafv4TlQDqdvzPyrQrBsNiCzhvqjwmcVQzweX-AL1os,24805
 inspect_ai/_eval/registry.py,sha256=8Cm-qyDB6Fthea8DUe-QES9plly_Pf2MUuCgeNQ3fOY,5303
-inspect_ai/_eval/run.py,sha256=PYP3zi5OjPguBkMNS6Nq4KrbEBiSH7ebdBsKHpjYhOE,21231
+inspect_ai/_eval/run.py,sha256=W3dMWHVHTyx_eqE6wV0bjDWol9ZQUON1oQtZHZ2HK6E,21299
 inspect_ai/_eval/score.py,sha256=KodaNhMCE1KV8qS33zj7Q8I0LD080WRCb32tyg1956w,10443
 inspect_ai/_eval/task/__init__.py,sha256=6FvojMW3yo36L7xDacppCHDxt6A8_tzj_ftg5bQ6eNk,199
 inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
@@ -58,7 +58,7 @@ inspect_ai/_eval/task/epochs.py,sha256=Ci7T6CQniSOTChv5Im2dCdSDrP-5hq19rV6iJ2uBc
 inspect_ai/_eval/task/error.py,sha256=Vhqinfdf0eIrjn7kUY7-id8Kbdggr-fEFpAJeJrkJ1M,1244
 inspect_ai/_eval/task/generate.py,sha256=yzeGlRUgIut-3OvF0xyx1ZjuJS61nR7thHV8tqv_aTE,2146
 inspect_ai/_eval/task/images.py,sha256=nTzHizlyuPYumPH7gAOBSrNkTwTbAmZ7tKdzN7d_R2k,4035
-inspect_ai/_eval/task/log.py,sha256=g5KACnK_QuOocjhhYgeWsSx6n9cahuX4l6hiYET25qE,11835
+inspect_ai/_eval/task/log.py,sha256=O7cS-QHlJiV8JV-7G4-Eu06qYXAYf-GCCqBOSFikJ30,11919
 inspect_ai/_eval/task/resolved.py,sha256=LBVHEeq9N1fkRObmA2pnDE_l_EuH6n2Dg8-c8yCGT5U,1007
 inspect_ai/_eval/task/results.py,sha256=x4weYRK2XGowfBG3f2msOeZQ_pxh230HTlw6kps33jw,17925
 inspect_ai/_eval/task/run.py,sha256=VdqQnHqP_fWog_Re3L-kxN8MRAU41tU9xqfFILvNN7E,39120
@@ -591,10 +591,10 @@ inspect_ai/log/_tree.py,sha256=C817m_7-66ThyCX5K4nVA7AzYOgLXWlKMdTQ-ueNA-U,3232
 inspect_ai/log/_util.py,sha256=j7jeqDendiCt12U_iaPQj8fLgTA44pk04ZM1tGQdau4,1699
 inspect_ai/log/_recorders/__init__.py,sha256=qMm2y1HOzS499ZTXHOQExSN8PJ-I3LnH35icbP2m4VU,412
 inspect_ai/log/_recorders/create.py,sha256=WB-fms0dBDHlTtTa_a_r0fFc6UPRvQZKZT7d_Inp-EU,1103
-inspect_ai/log/_recorders/eval.py,sha256=tPO0Jh56d4VOPjjqCl1mHEQEkpdfhcC2rvA6BAdWedY,15972
+inspect_ai/log/_recorders/eval.py,sha256=4ZsidUM9td3evoEalPkVZCZYajQhLGRk9Qu3SNLQU4Q,16062
 inspect_ai/log/_recorders/file.py,sha256=aY1aGOwWfoXUD7c_imrcN9rqCFA-6xEjAef0HIPOaBM,3707
-inspect_ai/log/_recorders/json.py,sha256=sF4cc-_TrUgRxngHwj0p7Rsjy67XTbWSH_SRCEqz9RQ,8782
-inspect_ai/log/_recorders/recorder.py,sha256=Fn4lrfUJmxIhxy3hhQzpDjVABtCXQ7wtT0vRykzQ0YA,1787
+inspect_ai/log/_recorders/json.py,sha256=0CxvvlyeUQQph0fsRpgHnfooyCt7E_xnj58s2yeNp_w,8817
+inspect_ai/log/_recorders/recorder.py,sha256=yXv8DXjegZxfKirPZOnfJQoXsl-AKJRL0e9r7S8Jo4s,1822
 inspect_ai/log/_recorders/types.py,sha256=cV7eFpXsbmXVw4r7xw1RdJC6Jltvw9xLXYPbFsYW7Oc,159
 inspect_ai/log/_recorders/buffer/__init__.py,sha256=6DsRdnNl-ic-xJmnBE5i45ZP3eB4yAta9wxi5WFcbqc,367
 inspect_ai/log/_recorders/buffer/buffer.py,sha256=rtLvaX7nSqNrWb-3CeSaOHwJgF1CzRgXFT_I1dDkM1k,945
@@ -758,9 +758,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
 inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
 inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
 inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
-inspect_ai-0.3.102.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
-inspect_ai-0.3.102.dist-info/METADATA,sha256=oeFq80JBy8nfGBlfCzuBmSlBfy4ybvQ1z-E1WB1ItY0,5460
-inspect_ai-0.3.102.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-inspect_ai-0.3.102.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
-inspect_ai-0.3.102.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
-inspect_ai-0.3.102.dist-info/RECORD,,
+inspect_ai-0.3.103.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
+inspect_ai-0.3.103.dist-info/METADATA,sha256=IELaF2h46Zjz8Saj1dK6_qqjQPyg_kFej7zvfH7uO8s,5460
+inspect_ai-0.3.103.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+inspect_ai-0.3.103.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
+inspect_ai-0.3.103.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
+inspect_ai-0.3.103.dist-info/RECORD,,

{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/WHEEL RENAMED Viewed

File without changes

{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.103.dist-info}/top_level.txt RENAMED Viewed

File without changes

inspect-ai 0.3.102__py3-none-any.whl → 0.3.103__py3-none-any.whl

inspect-ai 0.3.102py3-none-any.whl → 0.3.103py3-none-any.whl