inspect-ai 0.3.97__py3-none-any.whl → 0.3.98__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
inspect_ai/_eval/run.py CHANGED
@@ -475,7 +475,12 @@ async def startup_sandbox_environments(
475
475
  sandboxenvs: Set[TaskSandboxEnvironment] = set()
476
476
  for task in tasks:
477
477
  # resolve each sample and add to sandboxenvs
478
- dataset = slice_dataset(task.task.dataset, config.limit, config.sample_id)
478
+ resolved_task_sample_ids = resolve_task_sample_ids(
479
+ task.task.name, config.sample_id
480
+ )
481
+ dataset = slice_dataset(
482
+ task.task.dataset, config.limit, resolved_task_sample_ids
483
+ )
479
484
  for sample in dataset:
480
485
  sandbox = await resolve_sandbox_for_task_and_sample(
481
486
  eval_sandbox, task.task, sample
@@ -51,7 +51,6 @@ class Sample(BaseModel):
51
51
  or narrative text to be used by a model grader.
52
52
  id: Optional. Unique identifier for sample.
53
53
  metadata: Optional. Arbitrary metadata associated with the sample.
54
- sandbox (SandboxEnvironmentType | None): Sandbox environment type (or optionally a str or tuple with a shorthand spec)
55
54
  sandbox: Optional. Sandbox specification for this sample.
56
55
  files: Optional. Files that go along with the sample (copied to
57
56
  SandboxEnvironment). Files can be paths, inline text, or inline binary (base64 encoded data URL).
@@ -350,6 +350,12 @@ class GoogleGenAIAPI(ModelAPI):
350
350
  self.is_gemini() and not self.is_gemini_1_5() and not self.is_gemini_2_0()
351
351
  )
352
352
  if has_thinking_config:
353
+ if config.reasoning_tokens == 0:
354
+ # When reasoning_tokens is set to zero, we disable reasoning and return None.
355
+ # We cannot return a ThinkingConfig with reasoning_tokens set to 0,
356
+ # as this will cause the Gemini API to return a 400 INVALID_ARGUMENT error.
357
+ return None
358
+
353
359
  return ThinkingConfig(
354
360
  include_thoughts=True, thinking_budget=config.reasoning_tokens
355
361
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.97
3
+ Version: 0.3.98
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
@@ -49,7 +49,7 @@ Requires-Dist: semver>=3.0.0
49
49
  Requires-Dist: shortuuid
50
50
  Requires-Dist: sniffio
51
51
  Requires-Dist: tenacity
52
- Requires-Dist: textual>=0.86.2
52
+ Requires-Dist: textual<v3.0.0,>=0.86.2
53
53
  Requires-Dist: typing_extensions>=4.9.0
54
54
  Requires-Dist: zipp>=3.19.1
55
55
  Provides-Extra: dev
@@ -50,7 +50,7 @@ inspect_ai/_eval/evalset.py,sha256=Gvj22yy5WI_SOJYfVdtC-qmMjzdBOFTSq_RZznIQnZo,2
50
50
  inspect_ai/_eval/list.py,sha256=VbZ-2EI6MqrXvCN7VTz21TQSoU5K5_Q0hqhxmj5A_m0,3744
51
51
  inspect_ai/_eval/loader.py,sha256=dafv4TlQDqdvzPyrQrBsNiCzhvqjwmcVQzweX-AL1os,24805
52
52
  inspect_ai/_eval/registry.py,sha256=8Cm-qyDB6Fthea8DUe-QES9plly_Pf2MUuCgeNQ3fOY,5303
53
- inspect_ai/_eval/run.py,sha256=cv54aTFOi8nMWSG0Y40Z9o07V3nA-0_9uufgQDFP89k,20586
53
+ inspect_ai/_eval/run.py,sha256=QOu4OMaQey--c3OPRxEtg-01jUfKiMFUDcpweMW8rR0,20731
54
54
  inspect_ai/_eval/score.py,sha256=ns5X3NJBfQ9bcue54wrLIdc2674ofcua1ZtX3ZOwkOI,10477
55
55
  inspect_ai/_eval/task/__init__.py,sha256=6FvojMW3yo36L7xDacppCHDxt6A8_tzj_ftg5bQ6eNk,199
56
56
  inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
@@ -549,7 +549,7 @@ inspect_ai/approval/_human/manager.py,sha256=Igae35VS99TejSWUShNwFuVnmhwByK30H84
549
549
  inspect_ai/approval/_human/panel.py,sha256=UaO309bn7zMDV1I5GJcrZThhU8c8D4gOW7bwPRPPdkc,7672
550
550
  inspect_ai/approval/_human/util.py,sha256=DPxpA9_pzeoQyHpGtRE3hx8sGV7MyrlElCBvsh1FNgA,1996
551
551
  inspect_ai/dataset/__init__.py,sha256=4uTSHpN_ccdtbZulUMDetSSP-dXRkFGYsa2FzA5mLEw,534
552
- inspect_ai/dataset/_dataset.py,sha256=V2tS5vuUgsplUD_CUYG5vXsi5kqnvVp4XT7pfw9zj4Q,12071
552
+ inspect_ai/dataset/_dataset.py,sha256=CrDsUaoyJg5ijApVPxQgCiZRbkO-QA6xgv39q3LzQRc,11936
553
553
  inspect_ai/dataset/_util.py,sha256=u2IDIJdsa5lOsC67SNWZo0mRSey5DJdxS4DCCxSx7kE,7857
554
554
  inspect_ai/dataset/_examples/bias_detection.jsonl,sha256=ufXUZMjsJhY2_lJ9j_iJ6w0fiuyw7TQmJeMFoOqKYzM,20756
555
555
  inspect_ai/dataset/_examples/biology_qa.jsonl,sha256=Hfkr8XRT2x6Cgjd-zKbWGfFD-ZEfi566FqLX4w9USKs,2009
@@ -609,7 +609,7 @@ inspect_ai/model/_providers/anthropic.py,sha256=8R7lX1lKdVmTRksTpQ7RIFU_gj6rB5v0
609
609
  inspect_ai/model/_providers/azureai.py,sha256=EUtgc-qyIqzV0Oy3C0ztKgYU4ID4z5xwuOxtoRn8WoY,14607
610
610
  inspect_ai/model/_providers/bedrock.py,sha256=G252v6gUXtT56M4JaLLY7tEw2AJVQFucjeFgv0okhgo,23999
611
611
  inspect_ai/model/_providers/cloudflare.py,sha256=9yHfA5qbKWjzOfOzCJ_u8CZsH_U7AolAWLxvLBXKrhM,2375
612
- inspect_ai/model/_providers/google.py,sha256=qL3cK1HbEnFrWAXR9SR7DpF5LfXjEK95TEfX5ga4OYM,32259
612
+ inspect_ai/model/_providers/google.py,sha256=yp_UWN2JHbeqic9MV8nDMtkxyb0Z4pwHSwJ9ygTRXy8,32603
613
613
  inspect_ai/model/_providers/grok.py,sha256=iAPXmZMR7VWPq6EIwRsoUJr_TR6b5kTt-Fkba1pogGQ,1267
614
614
  inspect_ai/model/_providers/groq.py,sha256=q9o4sy0uUyLQbSThB-MMTmSc5AtKmb6GJfgHBpf5amM,12262
615
615
  inspect_ai/model/_providers/hf.py,sha256=jyXi4qyq2hdsp1waB2ON5m8f9mpE2h1GFD7Tu_phCEo,19115
@@ -744,9 +744,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
744
744
  inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
745
745
  inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
746
746
  inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
747
- inspect_ai-0.3.97.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
748
- inspect_ai-0.3.97.dist-info/METADATA,sha256=1jgLRbZ0oobGShNJf8VmtU4FAJv9D-OSjbclYCMs9i8,5438
749
- inspect_ai-0.3.97.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
750
- inspect_ai-0.3.97.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
751
- inspect_ai-0.3.97.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
752
- inspect_ai-0.3.97.dist-info/RECORD,,
747
+ inspect_ai-0.3.98.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
748
+ inspect_ai-0.3.98.dist-info/METADATA,sha256=AotZ1r5SK0mv2OC_fb3_1VKl9IowgTwrcTobo9f-z3s,5446
749
+ inspect_ai-0.3.98.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
750
+ inspect_ai-0.3.98.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
751
+ inspect_ai-0.3.98.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
752
+ inspect_ai-0.3.98.dist-info/RECORD,,