inspect-ai 0.3.101__py3-none-any.whl → 0.3.102__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
inspect_ai/_cli/eval.py CHANGED
@@ -949,7 +949,6 @@ def eval_exec(
949
949
  log_images=log_images,
950
950
  log_buffer=log_buffer,
951
951
  log_shared=log_shared,
952
- log_header_only=True, # cli invocation doesn't need full log
953
952
  score=score,
954
953
  score_display=score_display,
955
954
  )
inspect_ai/_eval/eval.py CHANGED
@@ -105,7 +105,6 @@ def eval(
105
105
  log_images: bool | None = None,
106
106
  log_buffer: int | None = None,
107
107
  log_shared: bool | int | None = None,
108
- log_header_only: bool | None = None,
109
108
  score: bool = True,
110
109
  score_display: bool | None = None,
111
110
  **kwargs: Unpack[GenerateConfigArgs],
@@ -182,8 +181,6 @@ def eval(
182
181
  log_shared: Sync sample events to log directory so that users on other systems
183
182
  can see log updates in realtime (defaults to no syncing). Specify `True`
184
183
  to sync every 10 seconds, otherwise an integer to sync every `n` seconds.
185
- log_header_only: If `True`, the function should return only log headers rather
186
- than full logs with samples (defaults to `False`).
187
184
  score: Score output (defaults to True)
188
185
  score_display: Show scoring metrics in realtime (defaults to True)
189
186
  **kwargs: Model generation options.
@@ -237,7 +234,6 @@ def eval(
237
234
  log_images=log_images,
238
235
  log_buffer=log_buffer,
239
236
  log_shared=log_shared,
240
- log_header_only=log_header_only,
241
237
  score=score,
242
238
  score_display=score_display,
243
239
  **kwargs,
@@ -292,7 +288,6 @@ async def eval_async(
292
288
  log_images: bool | None = None,
293
289
  log_buffer: int | None = None,
294
290
  log_shared: bool | int | None = None,
295
- log_header_only: bool | None = None,
296
291
  score: bool = True,
297
292
  score_display: bool | None = None,
298
293
  **kwargs: Unpack[GenerateConfigArgs],
@@ -349,9 +344,7 @@ async def eval_async(
349
344
  log_buffer: Number of samples to buffer before writing log file.
350
345
  If not specified, an appropriate default for the format and filesystem is
351
346
  chosen (10 for most all cases, 100 for JSON logs on remote filesystems).
352
- log_shared: Indicate that the log directory is shared, which results in additional
353
- syncing of realtime log data for Inspect View.
354
- log_header_only: If `True`, the function should return only log headers rather than full logs with samples (defaults to `False`).
347
+ log_shared: Indicate that the log directory is shared, which results in additional syncing of realtime log data for Inspect View.
355
348
  score: Score output (defaults to True)
356
349
  score_display: Show scoring metrics in realtime (defaults to True)
357
350
  **kwargs: Model generation options.
@@ -439,9 +432,6 @@ async def eval_async(
439
432
  # resolve log_shared
440
433
  log_shared = DEFAULT_LOG_SHARED if log_shared is True else log_shared
441
434
 
442
- # resolve header only
443
- log_header_only = log_header_only is True
444
-
445
435
  # validate that --log-shared can't use used with 'json' format
446
436
  if log_shared and log_format == JSON_LOG_FORMAT:
447
437
  raise PrerequisiteError(
@@ -517,7 +507,6 @@ async def eval_async(
517
507
  eval_config=eval_config,
518
508
  eval_sandbox=sandbox,
519
509
  recorder=recorder,
520
- header_only=log_header_only,
521
510
  epochs_reducer=epochs_reducer,
522
511
  solver=solver,
523
512
  tags=tags,
@@ -543,7 +532,6 @@ async def eval_async(
543
532
  eval_config=eval_config,
544
533
  eval_sandbox=sandbox,
545
534
  recorder=recorder,
546
- header_only=log_header_only,
547
535
  epochs_reducer=epochs_reducer,
548
536
  solver=solver,
549
537
  tags=tags,
@@ -235,7 +235,6 @@ def eval_set(
235
235
  log_images=log_images,
236
236
  log_buffer=log_buffer,
237
237
  log_shared=log_shared,
238
- log_header_only=True,
239
238
  score=score,
240
239
  **kwargs,
241
240
  )
inspect_ai/_eval/run.py CHANGED
@@ -63,7 +63,6 @@ async def eval_run(
63
63
  eval_config: EvalConfig,
64
64
  eval_sandbox: SandboxEnvironmentType | None,
65
65
  recorder: Recorder,
66
- header_only: bool,
67
66
  epochs_reducer: list[ScoreReducer] | None = None,
68
67
  solver: Solver | SolverSpec | None = None,
69
68
  tags: list[str] | None = None,
@@ -213,7 +212,6 @@ async def eval_run(
213
212
  eval_config=task_eval_config,
214
213
  metadata=((metadata or {}) | (task.metadata or {})) or None,
215
214
  recorder=recorder,
216
- header_only=header_only,
217
215
  )
218
216
  await logger.init()
219
217
 
@@ -75,7 +75,6 @@ class TaskLogger:
75
75
  eval_config: EvalConfig,
76
76
  metadata: dict[str, Any] | None,
77
77
  recorder: Recorder,
78
- header_only: bool,
79
78
  ) -> None:
80
79
  # determine versions
81
80
  git = git_context()
@@ -154,7 +153,6 @@ class TaskLogger:
154
153
 
155
154
  # stack recorder and location
156
155
  self.recorder = recorder
157
- self.header_only = header_only
158
156
 
159
157
  # number of samples logged
160
158
  self._samples_completed = 0
@@ -240,7 +238,7 @@ class TaskLogger:
240
238
  ) -> EvalLog:
241
239
  # finish and get log
242
240
  log = await self.recorder.log_finish(
243
- self.eval, status, stats, results, reductions, error, self.header_only
241
+ self.eval, status, stats, results, reductions, error
244
242
  )
245
243
 
246
244
  # cleanup the events db
@@ -133,7 +133,6 @@ class EvalRecorder(FileRecorder):
133
133
  results: EvalResults | None,
134
134
  reductions: list[EvalSampleReductions] | None,
135
135
  error: EvalError | None = None,
136
- header_only: bool = False,
137
136
  ) -> EvalLog:
138
137
  # get the key and log
139
138
  key = self._log_file_key(eval)
@@ -175,7 +174,7 @@ class EvalRecorder(FileRecorder):
175
174
 
176
175
  # flush and write the results
177
176
  await log.flush()
178
- return await log.close(header_only)
177
+ return await log.close()
179
178
 
180
179
  @classmethod
181
180
  @override
@@ -322,12 +321,12 @@ class ZipLogFile:
322
321
  # re-open zip file w/ self.temp_file pointer at end
323
322
  self._open()
324
323
 
325
- async def close(self, header_only: bool) -> EvalLog:
324
+ async def close(self) -> EvalLog:
326
325
  async with self._lock:
327
326
  # read the log from the temp file then close it
328
327
  try:
329
328
  self._temp_file.seek(0)
330
- return _read_log(self._temp_file, self._file, header_only=header_only)
329
+ return _read_log(self._temp_file, self._file)
331
330
  finally:
332
331
  self._temp_file.close()
333
332
  if self._zip:
@@ -96,7 +96,6 @@ class JSONRecorder(FileRecorder):
96
96
  results: EvalResults | None,
97
97
  reductions: list[EvalSampleReductions] | None,
98
98
  error: EvalError | None = None,
99
- header_only: bool = False,
100
99
  ) -> EvalLog:
101
100
  log = self.data[self._log_file_key(spec)]
102
101
  log.data.status = status
@@ -46,7 +46,6 @@ class Recorder(abc.ABC):
46
46
  results: EvalResults | None,
47
47
  reductions: list[EvalSampleReductions] | None,
48
48
  error: EvalError | None = None,
49
- header_only: bool = False,
50
49
  ) -> EvalLog: ...
51
50
 
52
51
  @classmethod
@@ -98,6 +98,10 @@ def is_computer_use_preview(name: str) -> bool:
98
98
  return "computer-use-preview" in name
99
99
 
100
100
 
101
+ def is_codex(name: str) -> bool:
102
+ return "codex" in name
103
+
104
+
101
105
  def is_gpt(name: str) -> bool:
102
106
  return "gpt" in name
103
107
 
@@ -27,6 +27,7 @@ from .._model_call import ModelCall
27
27
  from .._model_output import ModelOutput
28
28
  from .._openai import (
29
29
  OpenAIAsyncHttpxClient,
30
+ is_codex,
30
31
  is_computer_use_preview,
31
32
  is_gpt,
32
33
  is_o1,
@@ -88,8 +89,10 @@ class OpenAIAPI(ModelAPI):
88
89
 
89
90
  # is this a model we use responses api by default for?
90
91
  responses_model = (
91
- self.is_o_series() and not self.is_o1_early()
92
- ) or self.is_computer_use_preview()
92
+ (self.is_o_series() and not self.is_o1_early())
93
+ or self.is_computer_use_preview()
94
+ or self.is_codex()
95
+ )
93
96
 
94
97
  # resolve whether we are forcing the responses api
95
98
  self.responses_api = responses_api or responses_model
@@ -193,6 +196,9 @@ class OpenAIAPI(ModelAPI):
193
196
  def is_computer_use_preview(self) -> bool:
194
197
  return is_computer_use_preview(self.service_model_name())
195
198
 
199
+ def is_codex(self) -> bool:
200
+ return is_codex(self.service_model_name())
201
+
196
202
  def is_gpt(self) -> bool:
197
203
  return is_gpt(self.service_model_name())
198
204
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.101
3
+ Version: 0.3.102
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
@@ -3,7 +3,7 @@ inspect_ai/__main__.py,sha256=oWX4YwDZDg3GS3-IG0yPGoSEOfSzWihELg7QmrUlxjM,67
3
3
  inspect_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  inspect_ai/_cli/cache.py,sha256=RVGuBYwwk3c45JfyfcSFJ419etSsv3-Z7AyfQE-Hul0,3912
5
5
  inspect_ai/_cli/common.py,sha256=WbAgGbLcxABcWoEWiGCWSbkkxYr2jmL9i0cyseWHmRA,4165
6
- inspect_ai/_cli/eval.py,sha256=Y0ghetl7DaGY-CCYl0-iiVLAWjRNgtyFVtE7Yg6PDDA,39468
6
+ inspect_ai/_cli/eval.py,sha256=zX1QcvPAi-Vjk8NQUVlG48sa3vowTXAjmv0kQogtGiw,39394
7
7
  inspect_ai/_cli/info.py,sha256=QMxaTG9TmzW95EiLrOgkzubvavoR-VHxo3eV7ppmrzI,1789
8
8
  inspect_ai/_cli/list.py,sha256=M8mazI8Zuq8Hp99YWKnxQd9UWx1Qi87zfXRzZYAAakk,2459
9
9
  inspect_ai/_cli/log.py,sha256=Ko4TLqoy1jD3DoL50EkcQMVc16BzkjYo8qrZBakC4e8,5818
@@ -45,12 +45,12 @@ inspect_ai/_display/textual/widgets/transcript.py,sha256=fmCJwe1EZ7bjeB6DXakQ2l3
45
45
  inspect_ai/_display/textual/widgets/vscode.py,sha256=SAIPO8VOkT_CFIfnCP_XxKixojdYXxMNdYU3Z2mq5Ek,1298
46
46
  inspect_ai/_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  inspect_ai/_eval/context.py,sha256=mdYinWG2lcYkWLieT42suzUDyaQBVHosbaWTKA6Uu48,1407
48
- inspect_ai/_eval/eval.py,sha256=4qOG1tulHZajfUX5RGFXVnrW5ajel3TdAwlvzAqJmKU,44161
49
- inspect_ai/_eval/evalset.py,sha256=V_nE7xzKzxlEqC-AZwqbYZ9FWzEfQM0oiLXb1zRZAgw,25073
48
+ inspect_ai/_eval/eval.py,sha256=mj7Blv-bsd1qgswWWSbA1rphiyQpLEZIxmbQ1hAExC0,43555
49
+ inspect_ai/_eval/evalset.py,sha256=qzv0VFqHEp_9E2G3IwNRLqYD6gdKK3XzAwPJGbiABhU,25039
50
50
  inspect_ai/_eval/list.py,sha256=VbZ-2EI6MqrXvCN7VTz21TQSoU5K5_Q0hqhxmj5A_m0,3744
51
51
  inspect_ai/_eval/loader.py,sha256=dafv4TlQDqdvzPyrQrBsNiCzhvqjwmcVQzweX-AL1os,24805
52
52
  inspect_ai/_eval/registry.py,sha256=8Cm-qyDB6Fthea8DUe-QES9plly_Pf2MUuCgeNQ3fOY,5303
53
- inspect_ai/_eval/run.py,sha256=W3dMWHVHTyx_eqE6wV0bjDWol9ZQUON1oQtZHZ2HK6E,21299
53
+ inspect_ai/_eval/run.py,sha256=PYP3zi5OjPguBkMNS6Nq4KrbEBiSH7ebdBsKHpjYhOE,21231
54
54
  inspect_ai/_eval/score.py,sha256=KodaNhMCE1KV8qS33zj7Q8I0LD080WRCb32tyg1956w,10443
55
55
  inspect_ai/_eval/task/__init__.py,sha256=6FvojMW3yo36L7xDacppCHDxt6A8_tzj_ftg5bQ6eNk,199
56
56
  inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
@@ -58,7 +58,7 @@ inspect_ai/_eval/task/epochs.py,sha256=Ci7T6CQniSOTChv5Im2dCdSDrP-5hq19rV6iJ2uBc
58
58
  inspect_ai/_eval/task/error.py,sha256=Vhqinfdf0eIrjn7kUY7-id8Kbdggr-fEFpAJeJrkJ1M,1244
59
59
  inspect_ai/_eval/task/generate.py,sha256=yzeGlRUgIut-3OvF0xyx1ZjuJS61nR7thHV8tqv_aTE,2146
60
60
  inspect_ai/_eval/task/images.py,sha256=nTzHizlyuPYumPH7gAOBSrNkTwTbAmZ7tKdzN7d_R2k,4035
61
- inspect_ai/_eval/task/log.py,sha256=O7cS-QHlJiV8JV-7G4-Eu06qYXAYf-GCCqBOSFikJ30,11919
61
+ inspect_ai/_eval/task/log.py,sha256=g5KACnK_QuOocjhhYgeWsSx6n9cahuX4l6hiYET25qE,11835
62
62
  inspect_ai/_eval/task/resolved.py,sha256=LBVHEeq9N1fkRObmA2pnDE_l_EuH6n2Dg8-c8yCGT5U,1007
63
63
  inspect_ai/_eval/task/results.py,sha256=x4weYRK2XGowfBG3f2msOeZQ_pxh230HTlw6kps33jw,17925
64
64
  inspect_ai/_eval/task/run.py,sha256=VdqQnHqP_fWog_Re3L-kxN8MRAU41tU9xqfFILvNN7E,39120
@@ -591,10 +591,10 @@ inspect_ai/log/_tree.py,sha256=C817m_7-66ThyCX5K4nVA7AzYOgLXWlKMdTQ-ueNA-U,3232
591
591
  inspect_ai/log/_util.py,sha256=j7jeqDendiCt12U_iaPQj8fLgTA44pk04ZM1tGQdau4,1699
592
592
  inspect_ai/log/_recorders/__init__.py,sha256=qMm2y1HOzS499ZTXHOQExSN8PJ-I3LnH35icbP2m4VU,412
593
593
  inspect_ai/log/_recorders/create.py,sha256=WB-fms0dBDHlTtTa_a_r0fFc6UPRvQZKZT7d_Inp-EU,1103
594
- inspect_ai/log/_recorders/eval.py,sha256=4ZsidUM9td3evoEalPkVZCZYajQhLGRk9Qu3SNLQU4Q,16062
594
+ inspect_ai/log/_recorders/eval.py,sha256=tPO0Jh56d4VOPjjqCl1mHEQEkpdfhcC2rvA6BAdWedY,15972
595
595
  inspect_ai/log/_recorders/file.py,sha256=aY1aGOwWfoXUD7c_imrcN9rqCFA-6xEjAef0HIPOaBM,3707
596
- inspect_ai/log/_recorders/json.py,sha256=0CxvvlyeUQQph0fsRpgHnfooyCt7E_xnj58s2yeNp_w,8817
597
- inspect_ai/log/_recorders/recorder.py,sha256=yXv8DXjegZxfKirPZOnfJQoXsl-AKJRL0e9r7S8Jo4s,1822
596
+ inspect_ai/log/_recorders/json.py,sha256=sF4cc-_TrUgRxngHwj0p7Rsjy67XTbWSH_SRCEqz9RQ,8782
597
+ inspect_ai/log/_recorders/recorder.py,sha256=Fn4lrfUJmxIhxy3hhQzpDjVABtCXQ7wtT0vRykzQ0YA,1787
598
598
  inspect_ai/log/_recorders/types.py,sha256=cV7eFpXsbmXVw4r7xw1RdJC6Jltvw9xLXYPbFsYW7Oc,159
599
599
  inspect_ai/log/_recorders/buffer/__init__.py,sha256=6DsRdnNl-ic-xJmnBE5i45ZP3eB4yAta9wxi5WFcbqc,367
600
600
  inspect_ai/log/_recorders/buffer/buffer.py,sha256=rtLvaX7nSqNrWb-3CeSaOHwJgF1CzRgXFT_I1dDkM1k,945
@@ -611,7 +611,7 @@ inspect_ai/model/_generate_config.py,sha256=17QzzPlLvAxmC7uOPAikTaJoNecvZn_7xTgX
611
611
  inspect_ai/model/_model.py,sha256=DOM65CcUcVi6np0fctSOW3RnroBZHeVgIK2e8tg-EzQ,53101
612
612
  inspect_ai/model/_model_call.py,sha256=VJ8wnl9Y81JaiClBYM8eyt1jVb3n-yc6Dd88ofRiJDc,2234
613
613
  inspect_ai/model/_model_output.py,sha256=1CLAt0JKsv9NYbN93-i5Fl0K035cxoO3bP_q19234Y8,8812
614
- inspect_ai/model/_openai.py,sha256=xZCe11EVbVV4-QOy33S_L0sQ_sobJtCO4qD3oz5XNdw,24467
614
+ inspect_ai/model/_openai.py,sha256=iJV716nI_3boHEHQa_gMj6_Ri3eu5bshqTcU1jWpn4Y,24529
615
615
  inspect_ai/model/_openai_computer_use.py,sha256=vbKkYLhqNuX16zuWfg5MaGp9H8URrPcLhKQ1pDsZtPo,5943
616
616
  inspect_ai/model/_openai_responses.py,sha256=cychfKxyxxWltd_kDiX1oCfN6hvBH-A1_muxRwvGwvg,23854
617
617
  inspect_ai/model/_openai_web_search.py,sha256=tKxoRHc8gHIt8kgIdp6eM_Ak62inGRTxdUe1JNJV0b8,1195
@@ -632,7 +632,7 @@ inspect_ai/model/_providers/mistral.py,sha256=RLJ0ymOHxDWyVJfF-7UeskkjPg7DefBipR
632
632
  inspect_ai/model/_providers/mockllm.py,sha256=gL9f-f5TOdE4a0GVENr3cOIIp2kv8zVXWPZ608rouGk,2440
633
633
  inspect_ai/model/_providers/none.py,sha256=6qLbZpHSoEZaaxFO7luieFjqig2Ju8Fu00DlRngAry8,935
634
634
  inspect_ai/model/_providers/ollama.py,sha256=eWhsVoZ8k_3zYpdDoZWcPN9rGly6Ha4O49N99EJfycs,591
635
- inspect_ai/model/_providers/openai.py,sha256=2TXHXFlrUXO5Sv0SNt8UDL41Kvl-b2C8AmoSaZqigsk,13616
635
+ inspect_ai/model/_providers/openai.py,sha256=WKf_54AIl6poQKaSv3QbLTQWDsNG1H9vYKfR4NTCQUY,13759
636
636
  inspect_ai/model/_providers/openai_compatible.py,sha256=OO7VoZA8RQb6y6iaRXMHabCOG4wWQfhsxJLD5wB38co,6592
637
637
  inspect_ai/model/_providers/openai_o1.py,sha256=ahdXt2TFtPTdDvSGVQw7EaVindfbFbY2pLZrrB45rFg,13305
638
638
  inspect_ai/model/_providers/openai_responses.py,sha256=hHnx5fgok_doLHmLVrfib6VYkrqUezwD7HlkIrCaKqY,6764
@@ -758,9 +758,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
758
758
  inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
759
759
  inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
760
760
  inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
761
- inspect_ai-0.3.101.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
762
- inspect_ai-0.3.101.dist-info/METADATA,sha256=KURxYFFPkyHjrFmLojh4js5ZrmOp61P1ARfGOGi4Lhg,5460
763
- inspect_ai-0.3.101.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
764
- inspect_ai-0.3.101.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
765
- inspect_ai-0.3.101.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
766
- inspect_ai-0.3.101.dist-info/RECORD,,
761
+ inspect_ai-0.3.102.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
762
+ inspect_ai-0.3.102.dist-info/METADATA,sha256=oeFq80JBy8nfGBlfCzuBmSlBfy4ybvQ1z-E1WB1ItY0,5460
763
+ inspect_ai-0.3.102.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
764
+ inspect_ai-0.3.102.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
765
+ inspect_ai-0.3.102.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
766
+ inspect_ai-0.3.102.dist-info/RECORD,,