PyPI - inspect-ai - Versions diffs - 0.3.101__py3-none-any.whl → 0.3.102__py3-none-any.whl - Mend

inspect-ai 0.3.101py3-none-any.whl → 0.3.102py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

inspect_ai/_cli/eval.py CHANGED Viewed

@@ -949,7 +949,6 @@ def eval_exec(
             log_images=log_images,
             log_buffer=log_buffer,
             log_shared=log_shared,
-            log_header_only=True,  # cli invocation doesn't need full log
             score=score,
             score_display=score_display,
         )

inspect_ai/_eval/eval.py CHANGED Viewed

@@ -105,7 +105,6 @@ def eval(
     log_images: bool | None = None,
     log_buffer: int | None = None,
     log_shared: bool | int | None = None,
-    log_header_only: bool | None = None,
     score: bool = True,
     score_display: bool | None = None,
     **kwargs: Unpack[GenerateConfigArgs],
@@ -182,8 +181,6 @@ def eval(
         log_shared: Sync sample events to log directory so that users on other systems
             can see log updates in realtime (defaults to no syncing). Specify `True`
             to sync every 10 seconds, otherwise an integer to sync every `n` seconds.
-        log_header_only: If `True`, the function should return only log headers rather
-            than full logs with samples (defaults to `False`).
         score: Score output (defaults to True)
         score_display: Show scoring metrics in realtime (defaults to True)
         **kwargs: Model generation options.
@@ -237,7 +234,6 @@ def eval(
                 log_images=log_images,
                 log_buffer=log_buffer,
                 log_shared=log_shared,
-                log_header_only=log_header_only,
                 score=score,
                 score_display=score_display,
                 **kwargs,
@@ -292,7 +288,6 @@ async def eval_async(
     log_images: bool | None = None,
     log_buffer: int | None = None,
     log_shared: bool | int | None = None,
-    log_header_only: bool | None = None,
     score: bool = True,
     score_display: bool | None = None,
     **kwargs: Unpack[GenerateConfigArgs],
@@ -349,9 +344,7 @@ async def eval_async(
         log_buffer: Number of samples to buffer before writing log file.
            If not specified, an appropriate default for the format and filesystem is
            chosen (10 for most all cases, 100 for JSON logs on remote filesystems).
-        log_shared: Indicate that the log directory is shared, which results in additional
-        syncing of realtime log data for Inspect View.
-        log_header_only: If `True`, the function should return only log headers rather than full logs with samples (defaults to `False`).
+        log_shared: Indicate that the log directory is shared, which results in additional syncing of realtime log data for Inspect View.
         score: Score output (defaults to True)
         score_display: Show scoring metrics in realtime (defaults to True)
         **kwargs: Model generation options.
@@ -439,9 +432,6 @@ async def eval_async(
         # resolve log_shared
         log_shared = DEFAULT_LOG_SHARED if log_shared is True else log_shared
-        # resolve header only
-        log_header_only = log_header_only is True
         # validate that --log-shared can't use used with 'json' format
         if log_shared and log_format == JSON_LOG_FORMAT:
             raise PrerequisiteError(
@@ -517,7 +507,6 @@ async def eval_async(
                         eval_config=eval_config,
                         eval_sandbox=sandbox,
                         recorder=recorder,
-                        header_only=log_header_only,
                         epochs_reducer=epochs_reducer,
                         solver=solver,
                         tags=tags,
@@ -543,7 +532,6 @@ async def eval_async(
                 eval_config=eval_config,
                 eval_sandbox=sandbox,
                 recorder=recorder,
-                header_only=log_header_only,
                 epochs_reducer=epochs_reducer,
                 solver=solver,
                 tags=tags,

inspect_ai/_eval/evalset.py CHANGED Viewed

@@ -235,7 +235,6 @@ def eval_set(
             log_images=log_images,
             log_buffer=log_buffer,
             log_shared=log_shared,
-            log_header_only=True,
             score=score,
             **kwargs,
         )

inspect_ai/_eval/run.py CHANGED Viewed

@@ -63,7 +63,6 @@ async def eval_run(
     eval_config: EvalConfig,
     eval_sandbox: SandboxEnvironmentType | None,
     recorder: Recorder,
-    header_only: bool,
     epochs_reducer: list[ScoreReducer] | None = None,
     solver: Solver | SolverSpec | None = None,
     tags: list[str] | None = None,
@@ -213,7 +212,6 @@ async def eval_run(
                     eval_config=task_eval_config,
                     metadata=((metadata or {}) | (task.metadata or {})) or None,
                     recorder=recorder,
-                    header_only=header_only,
                 )
                 await logger.init()

inspect_ai/_eval/task/log.py CHANGED Viewed

@@ -75,7 +75,6 @@ class TaskLogger:
         eval_config: EvalConfig,
         metadata: dict[str, Any] | None,
         recorder: Recorder,
-        header_only: bool,
     ) -> None:
         # determine versions
         git = git_context()
@@ -154,7 +153,6 @@ class TaskLogger:
         # stack recorder and location
         self.recorder = recorder
-        self.header_only = header_only
         # number of samples logged
         self._samples_completed = 0
@@ -240,7 +238,7 @@ class TaskLogger:
     ) -> EvalLog:
         # finish and get log
         log = await self.recorder.log_finish(
-            self.eval, status, stats, results, reductions, error, self.header_only
+            self.eval, status, stats, results, reductions, error
         )
         # cleanup the events db

inspect_ai/log/_recorders/eval.py CHANGED Viewed

@@ -133,7 +133,6 @@ class EvalRecorder(FileRecorder):
         results: EvalResults | None,
         reductions: list[EvalSampleReductions] | None,
         error: EvalError | None = None,
-        header_only: bool = False,
     ) -> EvalLog:
         # get the key and log
         key = self._log_file_key(eval)
@@ -175,7 +174,7 @@ class EvalRecorder(FileRecorder):
         # flush and write the results
         await log.flush()
-        return await log.close(header_only)
+        return await log.close()
     @classmethod
     @override
@@ -322,12 +321,12 @@ class ZipLogFile:
                     # re-open zip file w/ self.temp_file pointer at end
                     self._open()
-    async def close(self, header_only: bool) -> EvalLog:
+    async def close(self) -> EvalLog:
         async with self._lock:
             # read the log from the temp file then close it
             try:
                 self._temp_file.seek(0)
-                return _read_log(self._temp_file, self._file, header_only=header_only)
+                return _read_log(self._temp_file, self._file)
             finally:
                 self._temp_file.close()
                 if self._zip:

inspect_ai/log/_recorders/json.py CHANGED Viewed

@@ -96,7 +96,6 @@ class JSONRecorder(FileRecorder):
         results: EvalResults | None,
         reductions: list[EvalSampleReductions] | None,
         error: EvalError | None = None,
-        header_only: bool = False,
     ) -> EvalLog:
         log = self.data[self._log_file_key(spec)]
         log.data.status = status

inspect_ai/log/_recorders/recorder.py CHANGED Viewed

@@ -46,7 +46,6 @@ class Recorder(abc.ABC):
         results: EvalResults | None,
         reductions: list[EvalSampleReductions] | None,
         error: EvalError | None = None,
-        header_only: bool = False,
     ) -> EvalLog: ...
     @classmethod

inspect_ai/model/_openai.py CHANGED Viewed

@@ -98,6 +98,10 @@ def is_computer_use_preview(name: str) -> bool:
     return "computer-use-preview" in name
+def is_codex(name: str) -> bool:
+    return "codex" in name
 def is_gpt(name: str) -> bool:
     return "gpt" in name

inspect_ai/model/_providers/openai.py CHANGED Viewed

@@ -27,6 +27,7 @@ from .._model_call import ModelCall
 from .._model_output import ModelOutput
 from .._openai import (
     OpenAIAsyncHttpxClient,
+    is_codex,
     is_computer_use_preview,
     is_gpt,
     is_o1,
@@ -88,8 +89,10 @@ class OpenAIAPI(ModelAPI):
         # is this a model we use responses api by default for?
         responses_model = (
-            self.is_o_series() and not self.is_o1_early()
-        ) or self.is_computer_use_preview()
+            (self.is_o_series() and not self.is_o1_early())
+            or self.is_computer_use_preview()
+            or self.is_codex()
+        )
         # resolve whether we are forcing the responses api
         self.responses_api = responses_api or responses_model
@@ -193,6 +196,9 @@ class OpenAIAPI(ModelAPI):
     def is_computer_use_preview(self) -> bool:
         return is_computer_use_preview(self.service_model_name())
+    def is_codex(self) -> bool:
+        return is_codex(self.service_model_name())
     def is_gpt(self) -> bool:
         return is_gpt(self.service_model_name())

{inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inspect_ai
-Version: 0.3.101
+Version: 0.3.102
 Summary: Framework for large language model evaluations
 Author: UK AI Security Institute
 License: MIT License

{inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ inspect_ai/__main__.py,sha256=oWX4YwDZDg3GS3-IG0yPGoSEOfSzWihELg7QmrUlxjM,67
 inspect_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 inspect_ai/_cli/cache.py,sha256=RVGuBYwwk3c45JfyfcSFJ419etSsv3-Z7AyfQE-Hul0,3912
 inspect_ai/_cli/common.py,sha256=WbAgGbLcxABcWoEWiGCWSbkkxYr2jmL9i0cyseWHmRA,4165
-inspect_ai/_cli/eval.py,sha256=Y0ghetl7DaGY-CCYl0-iiVLAWjRNgtyFVtE7Yg6PDDA,39468
+inspect_ai/_cli/eval.py,sha256=zX1QcvPAi-Vjk8NQUVlG48sa3vowTXAjmv0kQogtGiw,39394
 inspect_ai/_cli/info.py,sha256=QMxaTG9TmzW95EiLrOgkzubvavoR-VHxo3eV7ppmrzI,1789
 inspect_ai/_cli/list.py,sha256=M8mazI8Zuq8Hp99YWKnxQd9UWx1Qi87zfXRzZYAAakk,2459
 inspect_ai/_cli/log.py,sha256=Ko4TLqoy1jD3DoL50EkcQMVc16BzkjYo8qrZBakC4e8,5818
@@ -45,12 +45,12 @@ inspect_ai/_display/textual/widgets/transcript.py,sha256=fmCJwe1EZ7bjeB6DXakQ2l3
 inspect_ai/_display/textual/widgets/vscode.py,sha256=SAIPO8VOkT_CFIfnCP_XxKixojdYXxMNdYU3Z2mq5Ek,1298
 inspect_ai/_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 inspect_ai/_eval/context.py,sha256=mdYinWG2lcYkWLieT42suzUDyaQBVHosbaWTKA6Uu48,1407
-inspect_ai/_eval/eval.py,sha256=4qOG1tulHZajfUX5RGFXVnrW5ajel3TdAwlvzAqJmKU,44161
-inspect_ai/_eval/evalset.py,sha256=V_nE7xzKzxlEqC-AZwqbYZ9FWzEfQM0oiLXb1zRZAgw,25073
+inspect_ai/_eval/eval.py,sha256=mj7Blv-bsd1qgswWWSbA1rphiyQpLEZIxmbQ1hAExC0,43555
+inspect_ai/_eval/evalset.py,sha256=qzv0VFqHEp_9E2G3IwNRLqYD6gdKK3XzAwPJGbiABhU,25039
 inspect_ai/_eval/list.py,sha256=VbZ-2EI6MqrXvCN7VTz21TQSoU5K5_Q0hqhxmj5A_m0,3744
 inspect_ai/_eval/loader.py,sha256=dafv4TlQDqdvzPyrQrBsNiCzhvqjwmcVQzweX-AL1os,24805
 inspect_ai/_eval/registry.py,sha256=8Cm-qyDB6Fthea8DUe-QES9plly_Pf2MUuCgeNQ3fOY,5303
-inspect_ai/_eval/run.py,sha256=W3dMWHVHTyx_eqE6wV0bjDWol9ZQUON1oQtZHZ2HK6E,21299
+inspect_ai/_eval/run.py,sha256=PYP3zi5OjPguBkMNS6Nq4KrbEBiSH7ebdBsKHpjYhOE,21231
 inspect_ai/_eval/score.py,sha256=KodaNhMCE1KV8qS33zj7Q8I0LD080WRCb32tyg1956w,10443
 inspect_ai/_eval/task/__init__.py,sha256=6FvojMW3yo36L7xDacppCHDxt6A8_tzj_ftg5bQ6eNk,199
 inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
@@ -58,7 +58,7 @@ inspect_ai/_eval/task/epochs.py,sha256=Ci7T6CQniSOTChv5Im2dCdSDrP-5hq19rV6iJ2uBc
 inspect_ai/_eval/task/error.py,sha256=Vhqinfdf0eIrjn7kUY7-id8Kbdggr-fEFpAJeJrkJ1M,1244
 inspect_ai/_eval/task/generate.py,sha256=yzeGlRUgIut-3OvF0xyx1ZjuJS61nR7thHV8tqv_aTE,2146
 inspect_ai/_eval/task/images.py,sha256=nTzHizlyuPYumPH7gAOBSrNkTwTbAmZ7tKdzN7d_R2k,4035
-inspect_ai/_eval/task/log.py,sha256=O7cS-QHlJiV8JV-7G4-Eu06qYXAYf-GCCqBOSFikJ30,11919
+inspect_ai/_eval/task/log.py,sha256=g5KACnK_QuOocjhhYgeWsSx6n9cahuX4l6hiYET25qE,11835
 inspect_ai/_eval/task/resolved.py,sha256=LBVHEeq9N1fkRObmA2pnDE_l_EuH6n2Dg8-c8yCGT5U,1007
 inspect_ai/_eval/task/results.py,sha256=x4weYRK2XGowfBG3f2msOeZQ_pxh230HTlw6kps33jw,17925
 inspect_ai/_eval/task/run.py,sha256=VdqQnHqP_fWog_Re3L-kxN8MRAU41tU9xqfFILvNN7E,39120
@@ -591,10 +591,10 @@ inspect_ai/log/_tree.py,sha256=C817m_7-66ThyCX5K4nVA7AzYOgLXWlKMdTQ-ueNA-U,3232
 inspect_ai/log/_util.py,sha256=j7jeqDendiCt12U_iaPQj8fLgTA44pk04ZM1tGQdau4,1699
 inspect_ai/log/_recorders/__init__.py,sha256=qMm2y1HOzS499ZTXHOQExSN8PJ-I3LnH35icbP2m4VU,412
 inspect_ai/log/_recorders/create.py,sha256=WB-fms0dBDHlTtTa_a_r0fFc6UPRvQZKZT7d_Inp-EU,1103
-inspect_ai/log/_recorders/eval.py,sha256=4ZsidUM9td3evoEalPkVZCZYajQhLGRk9Qu3SNLQU4Q,16062
+inspect_ai/log/_recorders/eval.py,sha256=tPO0Jh56d4VOPjjqCl1mHEQEkpdfhcC2rvA6BAdWedY,15972
 inspect_ai/log/_recorders/file.py,sha256=aY1aGOwWfoXUD7c_imrcN9rqCFA-6xEjAef0HIPOaBM,3707
-inspect_ai/log/_recorders/json.py,sha256=0CxvvlyeUQQph0fsRpgHnfooyCt7E_xnj58s2yeNp_w,8817
-inspect_ai/log/_recorders/recorder.py,sha256=yXv8DXjegZxfKirPZOnfJQoXsl-AKJRL0e9r7S8Jo4s,1822
+inspect_ai/log/_recorders/json.py,sha256=sF4cc-_TrUgRxngHwj0p7Rsjy67XTbWSH_SRCEqz9RQ,8782
+inspect_ai/log/_recorders/recorder.py,sha256=Fn4lrfUJmxIhxy3hhQzpDjVABtCXQ7wtT0vRykzQ0YA,1787
 inspect_ai/log/_recorders/types.py,sha256=cV7eFpXsbmXVw4r7xw1RdJC6Jltvw9xLXYPbFsYW7Oc,159
 inspect_ai/log/_recorders/buffer/__init__.py,sha256=6DsRdnNl-ic-xJmnBE5i45ZP3eB4yAta9wxi5WFcbqc,367
 inspect_ai/log/_recorders/buffer/buffer.py,sha256=rtLvaX7nSqNrWb-3CeSaOHwJgF1CzRgXFT_I1dDkM1k,945
@@ -611,7 +611,7 @@ inspect_ai/model/_generate_config.py,sha256=17QzzPlLvAxmC7uOPAikTaJoNecvZn_7xTgX
 inspect_ai/model/_model.py,sha256=DOM65CcUcVi6np0fctSOW3RnroBZHeVgIK2e8tg-EzQ,53101
 inspect_ai/model/_model_call.py,sha256=VJ8wnl9Y81JaiClBYM8eyt1jVb3n-yc6Dd88ofRiJDc,2234
 inspect_ai/model/_model_output.py,sha256=1CLAt0JKsv9NYbN93-i5Fl0K035cxoO3bP_q19234Y8,8812
-inspect_ai/model/_openai.py,sha256=xZCe11EVbVV4-QOy33S_L0sQ_sobJtCO4qD3oz5XNdw,24467
+inspect_ai/model/_openai.py,sha256=iJV716nI_3boHEHQa_gMj6_Ri3eu5bshqTcU1jWpn4Y,24529
 inspect_ai/model/_openai_computer_use.py,sha256=vbKkYLhqNuX16zuWfg5MaGp9H8URrPcLhKQ1pDsZtPo,5943
 inspect_ai/model/_openai_responses.py,sha256=cychfKxyxxWltd_kDiX1oCfN6hvBH-A1_muxRwvGwvg,23854
 inspect_ai/model/_openai_web_search.py,sha256=tKxoRHc8gHIt8kgIdp6eM_Ak62inGRTxdUe1JNJV0b8,1195
@@ -632,7 +632,7 @@ inspect_ai/model/_providers/mistral.py,sha256=RLJ0ymOHxDWyVJfF-7UeskkjPg7DefBipR
 inspect_ai/model/_providers/mockllm.py,sha256=gL9f-f5TOdE4a0GVENr3cOIIp2kv8zVXWPZ608rouGk,2440
 inspect_ai/model/_providers/none.py,sha256=6qLbZpHSoEZaaxFO7luieFjqig2Ju8Fu00DlRngAry8,935
 inspect_ai/model/_providers/ollama.py,sha256=eWhsVoZ8k_3zYpdDoZWcPN9rGly6Ha4O49N99EJfycs,591
-inspect_ai/model/_providers/openai.py,sha256=2TXHXFlrUXO5Sv0SNt8UDL41Kvl-b2C8AmoSaZqigsk,13616
+inspect_ai/model/_providers/openai.py,sha256=WKf_54AIl6poQKaSv3QbLTQWDsNG1H9vYKfR4NTCQUY,13759
 inspect_ai/model/_providers/openai_compatible.py,sha256=OO7VoZA8RQb6y6iaRXMHabCOG4wWQfhsxJLD5wB38co,6592
 inspect_ai/model/_providers/openai_o1.py,sha256=ahdXt2TFtPTdDvSGVQw7EaVindfbFbY2pLZrrB45rFg,13305
 inspect_ai/model/_providers/openai_responses.py,sha256=hHnx5fgok_doLHmLVrfib6VYkrqUezwD7HlkIrCaKqY,6764
@@ -758,9 +758,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
 inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
 inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
 inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
-inspect_ai-0.3.101.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
-inspect_ai-0.3.101.dist-info/METADATA,sha256=KURxYFFPkyHjrFmLojh4js5ZrmOp61P1ARfGOGi4Lhg,5460
-inspect_ai-0.3.101.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-inspect_ai-0.3.101.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
-inspect_ai-0.3.101.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
-inspect_ai-0.3.101.dist-info/RECORD,,
+inspect_ai-0.3.102.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
+inspect_ai-0.3.102.dist-info/METADATA,sha256=oeFq80JBy8nfGBlfCzuBmSlBfy4ybvQ1z-E1WB1ItY0,5460
+inspect_ai-0.3.102.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+inspect_ai-0.3.102.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
+inspect_ai-0.3.102.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
+inspect_ai-0.3.102.dist-info/RECORD,,

{inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/WHEEL RENAMED Viewed

File without changes

{inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/top_level.txt RENAMED Viewed

File without changes

inspect-ai 0.3.101__py3-none-any.whl → 0.3.102__py3-none-any.whl

inspect-ai 0.3.101py3-none-any.whl → 0.3.102py3-none-any.whl