inspect-ai 0.3.101__py3-none-any.whl → 0.3.102__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +0 -1
- inspect_ai/_eval/eval.py +1 -13
- inspect_ai/_eval/evalset.py +0 -1
- inspect_ai/_eval/run.py +0 -2
- inspect_ai/_eval/task/log.py +1 -3
- inspect_ai/log/_recorders/eval.py +3 -4
- inspect_ai/log/_recorders/json.py +0 -1
- inspect_ai/log/_recorders/recorder.py +0 -1
- inspect_ai/model/_openai.py +4 -0
- inspect_ai/model/_providers/openai.py +8 -2
- {inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/RECORD +16 -16
- {inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.101.dist-info → inspect_ai-0.3.102.dist-info}/top_level.txt +0 -0
inspect_ai/_cli/eval.py
CHANGED
inspect_ai/_eval/eval.py
CHANGED
@@ -105,7 +105,6 @@ def eval(
|
|
105
105
|
log_images: bool | None = None,
|
106
106
|
log_buffer: int | None = None,
|
107
107
|
log_shared: bool | int | None = None,
|
108
|
-
log_header_only: bool | None = None,
|
109
108
|
score: bool = True,
|
110
109
|
score_display: bool | None = None,
|
111
110
|
**kwargs: Unpack[GenerateConfigArgs],
|
@@ -182,8 +181,6 @@ def eval(
|
|
182
181
|
log_shared: Sync sample events to log directory so that users on other systems
|
183
182
|
can see log updates in realtime (defaults to no syncing). Specify `True`
|
184
183
|
to sync every 10 seconds, otherwise an integer to sync every `n` seconds.
|
185
|
-
log_header_only: If `True`, the function should return only log headers rather
|
186
|
-
than full logs with samples (defaults to `False`).
|
187
184
|
score: Score output (defaults to True)
|
188
185
|
score_display: Show scoring metrics in realtime (defaults to True)
|
189
186
|
**kwargs: Model generation options.
|
@@ -237,7 +234,6 @@ def eval(
|
|
237
234
|
log_images=log_images,
|
238
235
|
log_buffer=log_buffer,
|
239
236
|
log_shared=log_shared,
|
240
|
-
log_header_only=log_header_only,
|
241
237
|
score=score,
|
242
238
|
score_display=score_display,
|
243
239
|
**kwargs,
|
@@ -292,7 +288,6 @@ async def eval_async(
|
|
292
288
|
log_images: bool | None = None,
|
293
289
|
log_buffer: int | None = None,
|
294
290
|
log_shared: bool | int | None = None,
|
295
|
-
log_header_only: bool | None = None,
|
296
291
|
score: bool = True,
|
297
292
|
score_display: bool | None = None,
|
298
293
|
**kwargs: Unpack[GenerateConfigArgs],
|
@@ -349,9 +344,7 @@ async def eval_async(
|
|
349
344
|
log_buffer: Number of samples to buffer before writing log file.
|
350
345
|
If not specified, an appropriate default for the format and filesystem is
|
351
346
|
chosen (10 for most all cases, 100 for JSON logs on remote filesystems).
|
352
|
-
log_shared: Indicate that the log directory is shared, which results in additional
|
353
|
-
syncing of realtime log data for Inspect View.
|
354
|
-
log_header_only: If `True`, the function should return only log headers rather than full logs with samples (defaults to `False`).
|
347
|
+
log_shared: Indicate that the log directory is shared, which results in additional syncing of realtime log data for Inspect View.
|
355
348
|
score: Score output (defaults to True)
|
356
349
|
score_display: Show scoring metrics in realtime (defaults to True)
|
357
350
|
**kwargs: Model generation options.
|
@@ -439,9 +432,6 @@ async def eval_async(
|
|
439
432
|
# resolve log_shared
|
440
433
|
log_shared = DEFAULT_LOG_SHARED if log_shared is True else log_shared
|
441
434
|
|
442
|
-
# resolve header only
|
443
|
-
log_header_only = log_header_only is True
|
444
|
-
|
445
435
|
# validate that --log-shared can't use used with 'json' format
|
446
436
|
if log_shared and log_format == JSON_LOG_FORMAT:
|
447
437
|
raise PrerequisiteError(
|
@@ -517,7 +507,6 @@ async def eval_async(
|
|
517
507
|
eval_config=eval_config,
|
518
508
|
eval_sandbox=sandbox,
|
519
509
|
recorder=recorder,
|
520
|
-
header_only=log_header_only,
|
521
510
|
epochs_reducer=epochs_reducer,
|
522
511
|
solver=solver,
|
523
512
|
tags=tags,
|
@@ -543,7 +532,6 @@ async def eval_async(
|
|
543
532
|
eval_config=eval_config,
|
544
533
|
eval_sandbox=sandbox,
|
545
534
|
recorder=recorder,
|
546
|
-
header_only=log_header_only,
|
547
535
|
epochs_reducer=epochs_reducer,
|
548
536
|
solver=solver,
|
549
537
|
tags=tags,
|
inspect_ai/_eval/evalset.py
CHANGED
inspect_ai/_eval/run.py
CHANGED
@@ -63,7 +63,6 @@ async def eval_run(
|
|
63
63
|
eval_config: EvalConfig,
|
64
64
|
eval_sandbox: SandboxEnvironmentType | None,
|
65
65
|
recorder: Recorder,
|
66
|
-
header_only: bool,
|
67
66
|
epochs_reducer: list[ScoreReducer] | None = None,
|
68
67
|
solver: Solver | SolverSpec | None = None,
|
69
68
|
tags: list[str] | None = None,
|
@@ -213,7 +212,6 @@ async def eval_run(
|
|
213
212
|
eval_config=task_eval_config,
|
214
213
|
metadata=((metadata or {}) | (task.metadata or {})) or None,
|
215
214
|
recorder=recorder,
|
216
|
-
header_only=header_only,
|
217
215
|
)
|
218
216
|
await logger.init()
|
219
217
|
|
inspect_ai/_eval/task/log.py
CHANGED
@@ -75,7 +75,6 @@ class TaskLogger:
|
|
75
75
|
eval_config: EvalConfig,
|
76
76
|
metadata: dict[str, Any] | None,
|
77
77
|
recorder: Recorder,
|
78
|
-
header_only: bool,
|
79
78
|
) -> None:
|
80
79
|
# determine versions
|
81
80
|
git = git_context()
|
@@ -154,7 +153,6 @@ class TaskLogger:
|
|
154
153
|
|
155
154
|
# stack recorder and location
|
156
155
|
self.recorder = recorder
|
157
|
-
self.header_only = header_only
|
158
156
|
|
159
157
|
# number of samples logged
|
160
158
|
self._samples_completed = 0
|
@@ -240,7 +238,7 @@ class TaskLogger:
|
|
240
238
|
) -> EvalLog:
|
241
239
|
# finish and get log
|
242
240
|
log = await self.recorder.log_finish(
|
243
|
-
self.eval, status, stats, results, reductions, error
|
241
|
+
self.eval, status, stats, results, reductions, error
|
244
242
|
)
|
245
243
|
|
246
244
|
# cleanup the events db
|
@@ -133,7 +133,6 @@ class EvalRecorder(FileRecorder):
|
|
133
133
|
results: EvalResults | None,
|
134
134
|
reductions: list[EvalSampleReductions] | None,
|
135
135
|
error: EvalError | None = None,
|
136
|
-
header_only: bool = False,
|
137
136
|
) -> EvalLog:
|
138
137
|
# get the key and log
|
139
138
|
key = self._log_file_key(eval)
|
@@ -175,7 +174,7 @@ class EvalRecorder(FileRecorder):
|
|
175
174
|
|
176
175
|
# flush and write the results
|
177
176
|
await log.flush()
|
178
|
-
return await log.close(
|
177
|
+
return await log.close()
|
179
178
|
|
180
179
|
@classmethod
|
181
180
|
@override
|
@@ -322,12 +321,12 @@ class ZipLogFile:
|
|
322
321
|
# re-open zip file w/ self.temp_file pointer at end
|
323
322
|
self._open()
|
324
323
|
|
325
|
-
async def close(self
|
324
|
+
async def close(self) -> EvalLog:
|
326
325
|
async with self._lock:
|
327
326
|
# read the log from the temp file then close it
|
328
327
|
try:
|
329
328
|
self._temp_file.seek(0)
|
330
|
-
return _read_log(self._temp_file, self._file
|
329
|
+
return _read_log(self._temp_file, self._file)
|
331
330
|
finally:
|
332
331
|
self._temp_file.close()
|
333
332
|
if self._zip:
|
@@ -96,7 +96,6 @@ class JSONRecorder(FileRecorder):
|
|
96
96
|
results: EvalResults | None,
|
97
97
|
reductions: list[EvalSampleReductions] | None,
|
98
98
|
error: EvalError | None = None,
|
99
|
-
header_only: bool = False,
|
100
99
|
) -> EvalLog:
|
101
100
|
log = self.data[self._log_file_key(spec)]
|
102
101
|
log.data.status = status
|
inspect_ai/model/_openai.py
CHANGED
@@ -27,6 +27,7 @@ from .._model_call import ModelCall
|
|
27
27
|
from .._model_output import ModelOutput
|
28
28
|
from .._openai import (
|
29
29
|
OpenAIAsyncHttpxClient,
|
30
|
+
is_codex,
|
30
31
|
is_computer_use_preview,
|
31
32
|
is_gpt,
|
32
33
|
is_o1,
|
@@ -88,8 +89,10 @@ class OpenAIAPI(ModelAPI):
|
|
88
89
|
|
89
90
|
# is this a model we use responses api by default for?
|
90
91
|
responses_model = (
|
91
|
-
self.is_o_series() and not self.is_o1_early()
|
92
|
-
|
92
|
+
(self.is_o_series() and not self.is_o1_early())
|
93
|
+
or self.is_computer_use_preview()
|
94
|
+
or self.is_codex()
|
95
|
+
)
|
93
96
|
|
94
97
|
# resolve whether we are forcing the responses api
|
95
98
|
self.responses_api = responses_api or responses_model
|
@@ -193,6 +196,9 @@ class OpenAIAPI(ModelAPI):
|
|
193
196
|
def is_computer_use_preview(self) -> bool:
|
194
197
|
return is_computer_use_preview(self.service_model_name())
|
195
198
|
|
199
|
+
def is_codex(self) -> bool:
|
200
|
+
return is_codex(self.service_model_name())
|
201
|
+
|
196
202
|
def is_gpt(self) -> bool:
|
197
203
|
return is_gpt(self.service_model_name())
|
198
204
|
|
@@ -3,7 +3,7 @@ inspect_ai/__main__.py,sha256=oWX4YwDZDg3GS3-IG0yPGoSEOfSzWihELg7QmrUlxjM,67
|
|
3
3
|
inspect_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
inspect_ai/_cli/cache.py,sha256=RVGuBYwwk3c45JfyfcSFJ419etSsv3-Z7AyfQE-Hul0,3912
|
5
5
|
inspect_ai/_cli/common.py,sha256=WbAgGbLcxABcWoEWiGCWSbkkxYr2jmL9i0cyseWHmRA,4165
|
6
|
-
inspect_ai/_cli/eval.py,sha256=
|
6
|
+
inspect_ai/_cli/eval.py,sha256=zX1QcvPAi-Vjk8NQUVlG48sa3vowTXAjmv0kQogtGiw,39394
|
7
7
|
inspect_ai/_cli/info.py,sha256=QMxaTG9TmzW95EiLrOgkzubvavoR-VHxo3eV7ppmrzI,1789
|
8
8
|
inspect_ai/_cli/list.py,sha256=M8mazI8Zuq8Hp99YWKnxQd9UWx1Qi87zfXRzZYAAakk,2459
|
9
9
|
inspect_ai/_cli/log.py,sha256=Ko4TLqoy1jD3DoL50EkcQMVc16BzkjYo8qrZBakC4e8,5818
|
@@ -45,12 +45,12 @@ inspect_ai/_display/textual/widgets/transcript.py,sha256=fmCJwe1EZ7bjeB6DXakQ2l3
|
|
45
45
|
inspect_ai/_display/textual/widgets/vscode.py,sha256=SAIPO8VOkT_CFIfnCP_XxKixojdYXxMNdYU3Z2mq5Ek,1298
|
46
46
|
inspect_ai/_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
47
|
inspect_ai/_eval/context.py,sha256=mdYinWG2lcYkWLieT42suzUDyaQBVHosbaWTKA6Uu48,1407
|
48
|
-
inspect_ai/_eval/eval.py,sha256=
|
49
|
-
inspect_ai/_eval/evalset.py,sha256=
|
48
|
+
inspect_ai/_eval/eval.py,sha256=mj7Blv-bsd1qgswWWSbA1rphiyQpLEZIxmbQ1hAExC0,43555
|
49
|
+
inspect_ai/_eval/evalset.py,sha256=qzv0VFqHEp_9E2G3IwNRLqYD6gdKK3XzAwPJGbiABhU,25039
|
50
50
|
inspect_ai/_eval/list.py,sha256=VbZ-2EI6MqrXvCN7VTz21TQSoU5K5_Q0hqhxmj5A_m0,3744
|
51
51
|
inspect_ai/_eval/loader.py,sha256=dafv4TlQDqdvzPyrQrBsNiCzhvqjwmcVQzweX-AL1os,24805
|
52
52
|
inspect_ai/_eval/registry.py,sha256=8Cm-qyDB6Fthea8DUe-QES9plly_Pf2MUuCgeNQ3fOY,5303
|
53
|
-
inspect_ai/_eval/run.py,sha256=
|
53
|
+
inspect_ai/_eval/run.py,sha256=PYP3zi5OjPguBkMNS6Nq4KrbEBiSH7ebdBsKHpjYhOE,21231
|
54
54
|
inspect_ai/_eval/score.py,sha256=KodaNhMCE1KV8qS33zj7Q8I0LD080WRCb32tyg1956w,10443
|
55
55
|
inspect_ai/_eval/task/__init__.py,sha256=6FvojMW3yo36L7xDacppCHDxt6A8_tzj_ftg5bQ6eNk,199
|
56
56
|
inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
|
@@ -58,7 +58,7 @@ inspect_ai/_eval/task/epochs.py,sha256=Ci7T6CQniSOTChv5Im2dCdSDrP-5hq19rV6iJ2uBc
|
|
58
58
|
inspect_ai/_eval/task/error.py,sha256=Vhqinfdf0eIrjn7kUY7-id8Kbdggr-fEFpAJeJrkJ1M,1244
|
59
59
|
inspect_ai/_eval/task/generate.py,sha256=yzeGlRUgIut-3OvF0xyx1ZjuJS61nR7thHV8tqv_aTE,2146
|
60
60
|
inspect_ai/_eval/task/images.py,sha256=nTzHizlyuPYumPH7gAOBSrNkTwTbAmZ7tKdzN7d_R2k,4035
|
61
|
-
inspect_ai/_eval/task/log.py,sha256=
|
61
|
+
inspect_ai/_eval/task/log.py,sha256=g5KACnK_QuOocjhhYgeWsSx6n9cahuX4l6hiYET25qE,11835
|
62
62
|
inspect_ai/_eval/task/resolved.py,sha256=LBVHEeq9N1fkRObmA2pnDE_l_EuH6n2Dg8-c8yCGT5U,1007
|
63
63
|
inspect_ai/_eval/task/results.py,sha256=x4weYRK2XGowfBG3f2msOeZQ_pxh230HTlw6kps33jw,17925
|
64
64
|
inspect_ai/_eval/task/run.py,sha256=VdqQnHqP_fWog_Re3L-kxN8MRAU41tU9xqfFILvNN7E,39120
|
@@ -591,10 +591,10 @@ inspect_ai/log/_tree.py,sha256=C817m_7-66ThyCX5K4nVA7AzYOgLXWlKMdTQ-ueNA-U,3232
|
|
591
591
|
inspect_ai/log/_util.py,sha256=j7jeqDendiCt12U_iaPQj8fLgTA44pk04ZM1tGQdau4,1699
|
592
592
|
inspect_ai/log/_recorders/__init__.py,sha256=qMm2y1HOzS499ZTXHOQExSN8PJ-I3LnH35icbP2m4VU,412
|
593
593
|
inspect_ai/log/_recorders/create.py,sha256=WB-fms0dBDHlTtTa_a_r0fFc6UPRvQZKZT7d_Inp-EU,1103
|
594
|
-
inspect_ai/log/_recorders/eval.py,sha256=
|
594
|
+
inspect_ai/log/_recorders/eval.py,sha256=tPO0Jh56d4VOPjjqCl1mHEQEkpdfhcC2rvA6BAdWedY,15972
|
595
595
|
inspect_ai/log/_recorders/file.py,sha256=aY1aGOwWfoXUD7c_imrcN9rqCFA-6xEjAef0HIPOaBM,3707
|
596
|
-
inspect_ai/log/_recorders/json.py,sha256=
|
597
|
-
inspect_ai/log/_recorders/recorder.py,sha256=
|
596
|
+
inspect_ai/log/_recorders/json.py,sha256=sF4cc-_TrUgRxngHwj0p7Rsjy67XTbWSH_SRCEqz9RQ,8782
|
597
|
+
inspect_ai/log/_recorders/recorder.py,sha256=Fn4lrfUJmxIhxy3hhQzpDjVABtCXQ7wtT0vRykzQ0YA,1787
|
598
598
|
inspect_ai/log/_recorders/types.py,sha256=cV7eFpXsbmXVw4r7xw1RdJC6Jltvw9xLXYPbFsYW7Oc,159
|
599
599
|
inspect_ai/log/_recorders/buffer/__init__.py,sha256=6DsRdnNl-ic-xJmnBE5i45ZP3eB4yAta9wxi5WFcbqc,367
|
600
600
|
inspect_ai/log/_recorders/buffer/buffer.py,sha256=rtLvaX7nSqNrWb-3CeSaOHwJgF1CzRgXFT_I1dDkM1k,945
|
@@ -611,7 +611,7 @@ inspect_ai/model/_generate_config.py,sha256=17QzzPlLvAxmC7uOPAikTaJoNecvZn_7xTgX
|
|
611
611
|
inspect_ai/model/_model.py,sha256=DOM65CcUcVi6np0fctSOW3RnroBZHeVgIK2e8tg-EzQ,53101
|
612
612
|
inspect_ai/model/_model_call.py,sha256=VJ8wnl9Y81JaiClBYM8eyt1jVb3n-yc6Dd88ofRiJDc,2234
|
613
613
|
inspect_ai/model/_model_output.py,sha256=1CLAt0JKsv9NYbN93-i5Fl0K035cxoO3bP_q19234Y8,8812
|
614
|
-
inspect_ai/model/_openai.py,sha256=
|
614
|
+
inspect_ai/model/_openai.py,sha256=iJV716nI_3boHEHQa_gMj6_Ri3eu5bshqTcU1jWpn4Y,24529
|
615
615
|
inspect_ai/model/_openai_computer_use.py,sha256=vbKkYLhqNuX16zuWfg5MaGp9H8URrPcLhKQ1pDsZtPo,5943
|
616
616
|
inspect_ai/model/_openai_responses.py,sha256=cychfKxyxxWltd_kDiX1oCfN6hvBH-A1_muxRwvGwvg,23854
|
617
617
|
inspect_ai/model/_openai_web_search.py,sha256=tKxoRHc8gHIt8kgIdp6eM_Ak62inGRTxdUe1JNJV0b8,1195
|
@@ -632,7 +632,7 @@ inspect_ai/model/_providers/mistral.py,sha256=RLJ0ymOHxDWyVJfF-7UeskkjPg7DefBipR
|
|
632
632
|
inspect_ai/model/_providers/mockllm.py,sha256=gL9f-f5TOdE4a0GVENr3cOIIp2kv8zVXWPZ608rouGk,2440
|
633
633
|
inspect_ai/model/_providers/none.py,sha256=6qLbZpHSoEZaaxFO7luieFjqig2Ju8Fu00DlRngAry8,935
|
634
634
|
inspect_ai/model/_providers/ollama.py,sha256=eWhsVoZ8k_3zYpdDoZWcPN9rGly6Ha4O49N99EJfycs,591
|
635
|
-
inspect_ai/model/_providers/openai.py,sha256=
|
635
|
+
inspect_ai/model/_providers/openai.py,sha256=WKf_54AIl6poQKaSv3QbLTQWDsNG1H9vYKfR4NTCQUY,13759
|
636
636
|
inspect_ai/model/_providers/openai_compatible.py,sha256=OO7VoZA8RQb6y6iaRXMHabCOG4wWQfhsxJLD5wB38co,6592
|
637
637
|
inspect_ai/model/_providers/openai_o1.py,sha256=ahdXt2TFtPTdDvSGVQw7EaVindfbFbY2pLZrrB45rFg,13305
|
638
638
|
inspect_ai/model/_providers/openai_responses.py,sha256=hHnx5fgok_doLHmLVrfib6VYkrqUezwD7HlkIrCaKqY,6764
|
@@ -758,9 +758,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
|
|
758
758
|
inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
|
759
759
|
inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
|
760
760
|
inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
|
761
|
-
inspect_ai-0.3.
|
762
|
-
inspect_ai-0.3.
|
763
|
-
inspect_ai-0.3.
|
764
|
-
inspect_ai-0.3.
|
765
|
-
inspect_ai-0.3.
|
766
|
-
inspect_ai-0.3.
|
761
|
+
inspect_ai-0.3.102.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
|
762
|
+
inspect_ai-0.3.102.dist-info/METADATA,sha256=oeFq80JBy8nfGBlfCzuBmSlBfy4ybvQ1z-E1WB1ItY0,5460
|
763
|
+
inspect_ai-0.3.102.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
764
|
+
inspect_ai-0.3.102.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
|
765
|
+
inspect_ai-0.3.102.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
|
766
|
+
inspect_ai-0.3.102.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|