inspect-ai 0.3.97__py3-none-any.whl → 0.3.98__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_eval/run.py +6 -1
- inspect_ai/dataset/_dataset.py +0 -1
- inspect_ai/model/_providers/google.py +6 -0
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.98.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.98.dist-info}/RECORD +9 -9
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.98.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.98.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.98.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.98.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/run.py
CHANGED
@@ -475,7 +475,12 @@ async def startup_sandbox_environments(
|
|
475
475
|
sandboxenvs: Set[TaskSandboxEnvironment] = set()
|
476
476
|
for task in tasks:
|
477
477
|
# resolve each sample and add to sandboxenvs
|
478
|
-
|
478
|
+
resolved_task_sample_ids = resolve_task_sample_ids(
|
479
|
+
task.task.name, config.sample_id
|
480
|
+
)
|
481
|
+
dataset = slice_dataset(
|
482
|
+
task.task.dataset, config.limit, resolved_task_sample_ids
|
483
|
+
)
|
479
484
|
for sample in dataset:
|
480
485
|
sandbox = await resolve_sandbox_for_task_and_sample(
|
481
486
|
eval_sandbox, task.task, sample
|
inspect_ai/dataset/_dataset.py
CHANGED
@@ -51,7 +51,6 @@ class Sample(BaseModel):
|
|
51
51
|
or narrative text to be used by a model grader.
|
52
52
|
id: Optional. Unique identifier for sample.
|
53
53
|
metadata: Optional. Arbitrary metadata associated with the sample.
|
54
|
-
sandbox (SandboxEnvironmentType | None): Sandbox environment type (or optionally a str or tuple with a shorthand spec)
|
55
54
|
sandbox: Optional. Sandbox specification for this sample.
|
56
55
|
files: Optional. Files that go along with the sample (copied to
|
57
56
|
SandboxEnvironment). Files can be paths, inline text, or inline binary (base64 encoded data URL).
|
@@ -350,6 +350,12 @@ class GoogleGenAIAPI(ModelAPI):
|
|
350
350
|
self.is_gemini() and not self.is_gemini_1_5() and not self.is_gemini_2_0()
|
351
351
|
)
|
352
352
|
if has_thinking_config:
|
353
|
+
if config.reasoning_tokens == 0:
|
354
|
+
# When reasoning_tokens is set to zero, we disable reasoning and return None.
|
355
|
+
# We cannot return a ThinkingConfig with reasoning_tokens set to 0,
|
356
|
+
# as this will cause the Gemini API to return a 400 INVALID_ARGUMENT error.
|
357
|
+
return None
|
358
|
+
|
353
359
|
return ThinkingConfig(
|
354
360
|
include_thoughts=True, thinking_budget=config.reasoning_tokens
|
355
361
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.98
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Security Institute
|
6
6
|
License: MIT License
|
@@ -49,7 +49,7 @@ Requires-Dist: semver>=3.0.0
|
|
49
49
|
Requires-Dist: shortuuid
|
50
50
|
Requires-Dist: sniffio
|
51
51
|
Requires-Dist: tenacity
|
52
|
-
Requires-Dist: textual
|
52
|
+
Requires-Dist: textual<v3.0.0,>=0.86.2
|
53
53
|
Requires-Dist: typing_extensions>=4.9.0
|
54
54
|
Requires-Dist: zipp>=3.19.1
|
55
55
|
Provides-Extra: dev
|
@@ -50,7 +50,7 @@ inspect_ai/_eval/evalset.py,sha256=Gvj22yy5WI_SOJYfVdtC-qmMjzdBOFTSq_RZznIQnZo,2
|
|
50
50
|
inspect_ai/_eval/list.py,sha256=VbZ-2EI6MqrXvCN7VTz21TQSoU5K5_Q0hqhxmj5A_m0,3744
|
51
51
|
inspect_ai/_eval/loader.py,sha256=dafv4TlQDqdvzPyrQrBsNiCzhvqjwmcVQzweX-AL1os,24805
|
52
52
|
inspect_ai/_eval/registry.py,sha256=8Cm-qyDB6Fthea8DUe-QES9plly_Pf2MUuCgeNQ3fOY,5303
|
53
|
-
inspect_ai/_eval/run.py,sha256=
|
53
|
+
inspect_ai/_eval/run.py,sha256=QOu4OMaQey--c3OPRxEtg-01jUfKiMFUDcpweMW8rR0,20731
|
54
54
|
inspect_ai/_eval/score.py,sha256=ns5X3NJBfQ9bcue54wrLIdc2674ofcua1ZtX3ZOwkOI,10477
|
55
55
|
inspect_ai/_eval/task/__init__.py,sha256=6FvojMW3yo36L7xDacppCHDxt6A8_tzj_ftg5bQ6eNk,199
|
56
56
|
inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
|
@@ -549,7 +549,7 @@ inspect_ai/approval/_human/manager.py,sha256=Igae35VS99TejSWUShNwFuVnmhwByK30H84
|
|
549
549
|
inspect_ai/approval/_human/panel.py,sha256=UaO309bn7zMDV1I5GJcrZThhU8c8D4gOW7bwPRPPdkc,7672
|
550
550
|
inspect_ai/approval/_human/util.py,sha256=DPxpA9_pzeoQyHpGtRE3hx8sGV7MyrlElCBvsh1FNgA,1996
|
551
551
|
inspect_ai/dataset/__init__.py,sha256=4uTSHpN_ccdtbZulUMDetSSP-dXRkFGYsa2FzA5mLEw,534
|
552
|
-
inspect_ai/dataset/_dataset.py,sha256=
|
552
|
+
inspect_ai/dataset/_dataset.py,sha256=CrDsUaoyJg5ijApVPxQgCiZRbkO-QA6xgv39q3LzQRc,11936
|
553
553
|
inspect_ai/dataset/_util.py,sha256=u2IDIJdsa5lOsC67SNWZo0mRSey5DJdxS4DCCxSx7kE,7857
|
554
554
|
inspect_ai/dataset/_examples/bias_detection.jsonl,sha256=ufXUZMjsJhY2_lJ9j_iJ6w0fiuyw7TQmJeMFoOqKYzM,20756
|
555
555
|
inspect_ai/dataset/_examples/biology_qa.jsonl,sha256=Hfkr8XRT2x6Cgjd-zKbWGfFD-ZEfi566FqLX4w9USKs,2009
|
@@ -609,7 +609,7 @@ inspect_ai/model/_providers/anthropic.py,sha256=8R7lX1lKdVmTRksTpQ7RIFU_gj6rB5v0
|
|
609
609
|
inspect_ai/model/_providers/azureai.py,sha256=EUtgc-qyIqzV0Oy3C0ztKgYU4ID4z5xwuOxtoRn8WoY,14607
|
610
610
|
inspect_ai/model/_providers/bedrock.py,sha256=G252v6gUXtT56M4JaLLY7tEw2AJVQFucjeFgv0okhgo,23999
|
611
611
|
inspect_ai/model/_providers/cloudflare.py,sha256=9yHfA5qbKWjzOfOzCJ_u8CZsH_U7AolAWLxvLBXKrhM,2375
|
612
|
-
inspect_ai/model/_providers/google.py,sha256=
|
612
|
+
inspect_ai/model/_providers/google.py,sha256=yp_UWN2JHbeqic9MV8nDMtkxyb0Z4pwHSwJ9ygTRXy8,32603
|
613
613
|
inspect_ai/model/_providers/grok.py,sha256=iAPXmZMR7VWPq6EIwRsoUJr_TR6b5kTt-Fkba1pogGQ,1267
|
614
614
|
inspect_ai/model/_providers/groq.py,sha256=q9o4sy0uUyLQbSThB-MMTmSc5AtKmb6GJfgHBpf5amM,12262
|
615
615
|
inspect_ai/model/_providers/hf.py,sha256=jyXi4qyq2hdsp1waB2ON5m8f9mpE2h1GFD7Tu_phCEo,19115
|
@@ -744,9 +744,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
|
|
744
744
|
inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
|
745
745
|
inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
|
746
746
|
inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
|
747
|
-
inspect_ai-0.3.
|
748
|
-
inspect_ai-0.3.
|
749
|
-
inspect_ai-0.3.
|
750
|
-
inspect_ai-0.3.
|
751
|
-
inspect_ai-0.3.
|
752
|
-
inspect_ai-0.3.
|
747
|
+
inspect_ai-0.3.98.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
|
748
|
+
inspect_ai-0.3.98.dist-info/METADATA,sha256=AotZ1r5SK0mv2OC_fb3_1VKl9IowgTwrcTobo9f-z3s,5446
|
749
|
+
inspect_ai-0.3.98.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
750
|
+
inspect_ai-0.3.98.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
|
751
|
+
inspect_ai-0.3.98.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
|
752
|
+
inspect_ai-0.3.98.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|