inspect-ai 0.3.52__py3-none-any.whl → 0.3.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. inspect_ai/_cli/eval.py +29 -0
  2. inspect_ai/_display/core/progress.py +9 -3
  3. inspect_ai/_display/core/results.py +8 -4
  4. inspect_ai/_display/textual/widgets/task_detail.py +3 -0
  5. inspect_ai/_display/textual/widgets/tasks.py +86 -5
  6. inspect_ai/_eval/eval.py +16 -0
  7. inspect_ai/_eval/evalset.py +4 -0
  8. inspect_ai/_eval/registry.py +2 -2
  9. inspect_ai/_eval/task/results.py +22 -4
  10. inspect_ai/_eval/task/run.py +14 -10
  11. inspect_ai/_eval/task/sandbox.py +72 -43
  12. inspect_ai/_eval/task/task.py +4 -0
  13. inspect_ai/_eval/task/util.py +2 -0
  14. inspect_ai/_view/www/App.css +13 -0
  15. inspect_ai/_view/www/dist/assets/index.css +13 -0
  16. inspect_ai/_view/www/dist/assets/index.js +80 -43
  17. inspect_ai/_view/www/src/App.mjs +31 -6
  18. inspect_ai/_view/www/src/Types.mjs +6 -0
  19. inspect_ai/_view/www/src/components/JsonPanel.mjs +11 -17
  20. inspect_ai/_view/www/src/components/MessageContent.mjs +9 -2
  21. inspect_ai/_view/www/src/components/Tools.mjs +46 -18
  22. inspect_ai/_view/www/src/navbar/Navbar.mjs +12 -0
  23. inspect_ai/_view/www/src/samples/SampleList.mjs +2 -2
  24. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +2 -2
  25. inspect_ai/log/_log.py +3 -0
  26. inspect_ai/log/_recorders/eval.py +8 -7
  27. inspect_ai/model/_generate_config.py +6 -0
  28. inspect_ai/model/_providers/azureai.py +1 -1
  29. inspect_ai/model/_providers/bedrock.py +17 -1
  30. inspect_ai/model/_providers/hf.py +1 -1
  31. inspect_ai/model/_providers/openai.py +32 -8
  32. inspect_ai/model/_providers/providers.py +1 -1
  33. inspect_ai/model/_providers/vllm.py +1 -1
  34. inspect_ai/util/_sandbox/context.py +1 -2
  35. inspect_ai/util/_sandbox/docker/config.py +8 -10
  36. inspect_ai/util/_sandbox/docker/docker.py +9 -5
  37. inspect_ai/util/_sandbox/docker/util.py +3 -3
  38. inspect_ai/util/_sandbox/environment.py +7 -2
  39. inspect_ai/util/_sandbox/limits.py +1 -1
  40. inspect_ai/util/_sandbox/local.py +8 -9
  41. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/METADATA +1 -3
  42. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/RECORD +46 -46
  43. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/LICENSE +0 -0
  44. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/WHEEL +0 -0
  45. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/entry_points.txt +0 -0
  46. {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/top_level.txt +0 -0
@@ -63,26 +63,12 @@ export const ToolCallView = ({
63
63
  output,
64
64
  mode,
65
65
  }) => {
66
- const icon =
67
- mode === "compact"
68
- ? ""
69
- : html`<i
70
- class="bi bi-tools"
71
- style=${{
72
- marginRight: "0.2rem",
73
- opacity: "0.4",
74
- }}
75
- ></i>`;
76
- const codeIndent = mode === "compact" ? "" : "";
77
66
  return html`<div>
78
- ${icon}
79
- ${!view || view.title
80
- ? html`<code style=${{ fontSize: FontSize.small }}
81
- >${view?.title || functionCall}</code
82
- >`
67
+ ${mode !== "compact" && (!view || view.title)
68
+ ? html`<${ToolTitle} title=${view?.title || functionCall} />`
83
69
  : ""}
84
70
  <div>
85
- <div style=${{ marginLeft: `${codeIndent}` }}>
71
+ <div>
86
72
  <${ToolInput}
87
73
  type=${inputType}
88
74
  contents=${input}
@@ -92,7 +78,7 @@ export const ToolCallView = ({
92
78
  ${output
93
79
  ? html`
94
80
  <${ExpandablePanel} collapse=${true} border=${true} lines=${15}>
95
- <${MessageContent} contents=${output} />
81
+ <${MessageContent} contents=${normalizeContent(output)} />
96
82
  </${ExpandablePanel}>`
97
83
  : ""}
98
84
  </div>
@@ -100,6 +86,48 @@ export const ToolCallView = ({
100
86
  </div>`;
101
87
  };
102
88
 
89
+ /**
90
+ * Renders the ToolCallView component.
91
+ *
92
+ * @param {Object} props - The parameters for the component.
93
+ * @param {string} props.title - The title for the tool call
94
+ * @returns {import("preact").JSX.Element} The SampleTranscript component.
95
+ */
96
+ const ToolTitle = ({ title }) => {
97
+ return html` <i
98
+ class="bi bi-tools"
99
+ style=${{
100
+ marginRight: "0.2rem",
101
+ opacity: "0.4",
102
+ }}
103
+ ></i>
104
+ <code style=${{ fontSize: FontSize.small }}>${title}</code>`;
105
+ };
106
+
107
+ /**
108
+ * Renders the ToolCallView component.
109
+ *
110
+ * @param {string | number | boolean | (import("../types/log").ContentText | import("../types/log").ContentImage)[]} output - The tool output
111
+ * @returns {(import("../Types.mjs").ContentTool | import("../types/log").ContentText | import("../types/log").ContentImage)[]} The SampleTranscript component.
112
+ */
113
+ const normalizeContent = (output) => {
114
+ if (Array.isArray(output)) {
115
+ return output;
116
+ } else {
117
+ return [
118
+ {
119
+ type: "tool",
120
+ content: [
121
+ {
122
+ type: "text",
123
+ text: String(output),
124
+ },
125
+ ],
126
+ },
127
+ ];
128
+ }
129
+ };
130
+
103
131
  /**
104
132
  * Renders the ToolInput component.
105
133
  *
@@ -255,6 +255,8 @@ const ResultsPanel = ({ results }) => {
255
255
  justifyContent: "end",
256
256
  height: "100%",
257
257
  alignItems: "center",
258
+ maxHeight: "15em",
259
+ overflow: "scroll",
258
260
  }}
259
261
  >
260
262
  ${metrics.map((metric, i) => {
@@ -273,6 +275,8 @@ const ResultsPanel = ({ results }) => {
273
275
  marginTop: "0.2rem",
274
276
  paddingBottom: "0.4rem",
275
277
  rowGap: "1em",
278
+ maxHeight: "15em",
279
+ overflow: "scroll",
276
280
  }}
277
281
  >
278
282
  ${results?.scores?.map((score, index) => {
@@ -285,6 +289,14 @@ const ResultsPanel = ({ results }) => {
285
289
  }
286
290
  };
287
291
 
292
+ /** Renders a Vertial Metric
293
+ *
294
+ * @param {Object} props - The parameters for the component.
295
+ * @param {import("../types/log").EvalMetric} props.metric - The metric
296
+ * @param {boolean} props.isFirst - Whether this is the first metric
297
+ *
298
+ * @returns {import("preact").JSX.Element} The TranscriptView component.
299
+ */
288
300
  const VerticalMetric = ({ metric, isFirst }) => {
289
301
  const reducer_component = metric.reducer
290
302
  ? html` <div
@@ -145,7 +145,7 @@ export const SampleList = (props) => {
145
145
  );
146
146
 
147
147
  const listStyle = { ...style, flex: "1", overflowY: "auto", outline: "none" };
148
- const { limit, answer } = gridColumns(sampleDescriptor);
148
+ const { limit, answer, target } = gridColumns(sampleDescriptor);
149
149
 
150
150
  const headerRow = html`<div
151
151
  style=${{
@@ -161,7 +161,7 @@ export const SampleList = (props) => {
161
161
  >
162
162
  <div>Id</div>
163
163
  <div>Input</div>
164
- <div>Target</div>
164
+ <div>${target !== "0" ? "Target" : ""}</div>
165
165
  <div>${answer !== "0" ? "Answer" : ""}</div>
166
166
  <div>${limit !== "0" ? "Limit" : ""}</div>
167
167
  <div style=${{ justifySelf: "center" }}>Score</div>
@@ -29,10 +29,10 @@ export const ToolEventView = ({ id, event, style, depth }) => {
29
29
  return e.event === "approval";
30
30
  });
31
31
 
32
- const title = `Tool: ${event.function}`;
32
+ const title = `Tool: ${event.view?.title || event.function}`;
33
33
  return html`
34
34
  <${EventPanel} id=${id} title="${title}" subTitle=${formatDateTime(new Date(event.timestamp))} icon=${ApplicationIcons.solvers.use_tools} style=${style}>
35
- <div name="Summary" style=${{ margin: "0.5em 0" }}>
35
+ <div name="Summary" style=${{ margin: "0.5em 0", width: "100%" }}>
36
36
  <${ToolCallView}
37
37
  functionCall=${functionCall}
38
38
  input=${input}
inspect_ai/log/_log.py CHANGED
@@ -79,6 +79,9 @@ class EvalConfig(BaseModel):
79
79
  max_subprocesses: int | None = Field(default=None)
80
80
  """Maximum number of subprocesses to run concurrently."""
81
81
 
82
+ max_sandboxes: int | None = Field(default=None)
83
+ """Maximum number of sandboxes to run concurrently."""
84
+
82
85
  sandbox_cleanup: bool | None = Field(default=None)
83
86
  """Cleanup sandbox environments after task completes."""
84
87
 
@@ -362,13 +362,14 @@ class ZipLogFile:
362
362
  f"Error occurred during async write to {self._file}: {ex}. Falling back to sync write."
363
363
  )
364
364
 
365
- # write sync if we need to
366
- if not written:
367
- with file(self._file, "wb") as f:
368
- f.write(log_bytes)
369
-
370
- # re-open zip file w/ self.temp_file pointer at end
371
- self._open()
365
+ try:
366
+ # write sync if we need to
367
+ if not written:
368
+ with file(self._file, "wb") as f:
369
+ f.write(log_bytes)
370
+ finally:
371
+ # re-open zip file w/ self.temp_file pointer at end
372
+ self._open()
372
373
 
373
374
  async def close(self) -> EvalLog:
374
375
  async with self._lock:
@@ -72,6 +72,9 @@ class GenerateConfigArgs(TypedDict, total=False):
72
72
  cache_prompt: Literal["auto"] | bool | None
73
73
  """Whether to cache the prompt prefix. Defaults to "auto", which will enable caching for requests with tools. Anthropic only."""
74
74
 
75
+ reasoning_effort: Literal["low", "medium", "high"] | None
76
+ """Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
77
+
75
78
 
76
79
  class GenerateConfig(BaseModel):
77
80
  """Base class for model generation configs."""
@@ -139,6 +142,9 @@ class GenerateConfig(BaseModel):
139
142
  cache_prompt: Literal["auto"] | bool | None = Field(default=None)
140
143
  """Whether to cache the prompt prefix. Defaults to "auto", which will enable caching for requests with tools. Anthropic only."""
141
144
 
145
+ reasoning_effort: Literal["low", "medium", "high"] | None = Field(default=None)
146
+ """Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
147
+
142
148
  def merge(
143
149
  self, other: Union["GenerateConfig", GenerateConfigArgs]
144
150
  ) -> "GenerateConfig":
@@ -93,7 +93,7 @@ class AzureAIAPI(ModelAPI):
93
93
  def collect_model_arg(name: str) -> Any | None:
94
94
  nonlocal model_args
95
95
  value = model_args.get(name, None)
96
- if value:
96
+ if value is not None:
97
97
  model_args.pop(name)
98
98
  return value
99
99
 
@@ -236,15 +236,21 @@ class BedrockAPI(ModelAPI):
236
236
  self,
237
237
  model_name: str,
238
238
  base_url: str | None,
239
+ api_key: str | None = None,
239
240
  config: GenerateConfig = GenerateConfig(),
240
241
  **model_args: Any,
241
242
  ):
242
243
  super().__init__(
243
244
  model_name=model_name,
244
245
  base_url=model_base_url(base_url, "BEDROCK_BASE_URL"),
246
+ api_key=api_key,
247
+ api_key_vars=[],
245
248
  config=config,
246
249
  )
247
250
 
251
+ # save model_args
252
+ self.model_args = model_args
253
+
248
254
  # import aioboto3 on demand
249
255
  try:
250
256
  import aioboto3
@@ -263,6 +269,9 @@ class BedrockAPI(ModelAPI):
263
269
 
264
270
  @override
265
271
  def max_tokens(self) -> int | None:
272
+ if "llama3-70" in self.model_name or "llama3-8" in self.model_name:
273
+ return 2048
274
+
266
275
  if "llama3" in self.model_name or "claude3" in self.model_name:
267
276
  return 4096
268
277
 
@@ -316,6 +325,7 @@ class BedrockAPI(ModelAPI):
316
325
  mode="adaptive",
317
326
  ),
318
327
  ),
328
+ **self.model_args,
319
329
  ) as client:
320
330
  # Process the tools
321
331
  resolved_tools = converse_tools(tools)
@@ -658,6 +668,8 @@ def converse_image_type(type: str) -> ConverseImageFormat:
658
668
  return "png"
659
669
  case "image/webp":
660
670
  return "webp"
671
+ case "image/jpeg":
672
+ return "jpeg"
661
673
  case _:
662
674
  raise ValueError(
663
675
  f"Image mime type {type} is not supported for Bedrock Converse models."
@@ -673,7 +685,11 @@ def converse_tools(tools: list[ToolInfo]) -> list[ConverseTool] | None:
673
685
  tool_spec = ConverseToolSpec(
674
686
  name=tool.name,
675
687
  description=tool.description,
676
- inputSchema={"json": tool.parameters.model_dump(exclude_none=True)},
688
+ inputSchema={
689
+ "json": tool.parameters.model_dump(
690
+ exclude_none=True, exclude={"additionalProperties"}
691
+ )
692
+ },
677
693
  )
678
694
  result.append(ConverseTool(toolSpec=tool_spec))
679
695
  return result
@@ -64,7 +64,7 @@ class HuggingFaceAPI(ModelAPI):
64
64
  def collect_model_arg(name: str) -> Any | None:
65
65
  nonlocal model_args
66
66
  value = model_args.get(name, None)
67
- if value:
67
+ if value is not None:
68
68
  model_args.pop(name)
69
69
  return value
70
70
 
@@ -18,6 +18,7 @@ from openai.types.chat import (
18
18
  ChatCompletionContentPartImageParam,
19
19
  ChatCompletionContentPartParam,
20
20
  ChatCompletionContentPartTextParam,
21
+ ChatCompletionDeveloperMessageParam,
21
22
  ChatCompletionMessage,
22
23
  ChatCompletionMessageParam,
23
24
  ChatCompletionMessageToolCallParam,
@@ -141,6 +142,18 @@ class OpenAIAPI(ModelAPI):
141
142
  **model_args,
142
143
  )
143
144
 
145
+ def is_o1(self) -> bool:
146
+ return self.model_name.startswith("o1")
147
+
148
+ def is_o1_full(self) -> bool:
149
+ return self.is_o1() and not self.is_o1_mini() and not self.is_o1_preview()
150
+
151
+ def is_o1_mini(self) -> bool:
152
+ return self.model_name.startswith("o1-mini")
153
+
154
+ def is_o1_preview(self) -> bool:
155
+ return self.model_name.startswith("o1-preview")
156
+
144
157
  async def generate(
145
158
  self,
146
159
  input: list[ChatMessage],
@@ -148,8 +161,8 @@ class OpenAIAPI(ModelAPI):
148
161
  tool_choice: ToolChoice,
149
162
  config: GenerateConfig,
150
163
  ) -> ModelOutput | tuple[ModelOutput, ModelCall]:
151
- # short-circuit to call o1- model
152
- if self.model_name.startswith("o1-"):
164
+ # short-circuit to call o1- models that are text only
165
+ if self.is_o1_preview() or self.is_o1_mini():
153
166
  return await generate_o1(
154
167
  client=self.client,
155
168
  input=input,
@@ -179,7 +192,7 @@ class OpenAIAPI(ModelAPI):
179
192
 
180
193
  # prepare request (we do this so we can log the ModelCall)
181
194
  request = dict(
182
- messages=await as_openai_chat_messages(input),
195
+ messages=await as_openai_chat_messages(input, self.is_o1_full()),
183
196
  tools=chat_tools(tools) if len(tools) > 0 else NOT_GIVEN,
184
197
  tool_choice=chat_tool_choice(tool_choice) if len(tools) > 0 else NOT_GIVEN,
185
198
  **self.completion_params(config, len(tools) > 0),
@@ -271,8 +284,10 @@ class OpenAIAPI(ModelAPI):
271
284
  params["logprobs"] = config.logprobs
272
285
  if config.top_logprobs is not None:
273
286
  params["top_logprobs"] = config.top_logprobs
274
- if tools and config.parallel_tool_calls is not None:
287
+ if tools and config.parallel_tool_calls is not None and not self.is_o1():
275
288
  params["parallel_tool_calls"] = config.parallel_tool_calls
289
+ if config.reasoning_effort is not None and self.is_o1_full():
290
+ params["reasoning_effort"] = config.reasoning_effort
276
291
 
277
292
  return params
278
293
 
@@ -291,14 +306,23 @@ class OpenAIAPI(ModelAPI):
291
306
 
292
307
 
293
308
  async def as_openai_chat_messages(
294
- messages: list[ChatMessage],
309
+ messages: list[ChatMessage], o1_full: bool
295
310
  ) -> list[ChatCompletionMessageParam]:
296
- return [await openai_chat_message(message) for message in messages]
311
+ return [await openai_chat_message(message, o1_full) for message in messages]
297
312
 
298
313
 
299
- async def openai_chat_message(message: ChatMessage) -> ChatCompletionMessageParam:
314
+ async def openai_chat_message(
315
+ message: ChatMessage, o1_full: bool
316
+ ) -> ChatCompletionMessageParam:
300
317
  if message.role == "system":
301
- return ChatCompletionSystemMessageParam(role=message.role, content=message.text)
318
+ if o1_full:
319
+ return ChatCompletionDeveloperMessageParam(
320
+ role="developer", content=message.text
321
+ )
322
+ else:
323
+ return ChatCompletionSystemMessageParam(
324
+ role=message.role, content=message.text
325
+ )
302
326
  elif message.role == "user":
303
327
  return ChatCompletionUserMessageParam(
304
328
  role=message.role,
@@ -242,7 +242,7 @@ def mockllm() -> type[ModelAPI]:
242
242
  def validate_openai_client(feature: str) -> None:
243
243
  FEATURE = feature
244
244
  PACKAGE = "openai"
245
- MIN_VERSION = "1.45.0"
245
+ MIN_VERSION = "1.58.1"
246
246
 
247
247
  # verify we have the package
248
248
  try:
@@ -75,7 +75,7 @@ class VLLMAPI(ModelAPI):
75
75
  def collect_model_arg(name: str) -> Any | None:
76
76
  nonlocal model_args
77
77
  value = model_args.get(name, None)
78
- if value:
78
+ if value is not None:
79
79
  model_args.pop(name)
80
80
  return value
81
81
 
@@ -109,7 +109,7 @@ def raise_no_sandbox() -> NoReturn:
109
109
 
110
110
 
111
111
  async def init_sandbox_environments_sample(
112
- type: str,
112
+ sandboxenv_type: type[SandboxEnvironment],
113
113
  task_name: str,
114
114
  config: SandboxEnvironmentConfigType | None,
115
115
  files: dict[str, bytes],
@@ -117,7 +117,6 @@ async def init_sandbox_environments_sample(
117
117
  metadata: dict[str, Any],
118
118
  ) -> dict[str, SandboxEnvironment]:
119
119
  # get setup and cleanup functions
120
- sandboxenv_type = registry_find_sandboxenv(type)
121
120
  sample_init = cast(SampleInit, getattr(sandboxenv_type, "sample_init"))
122
121
  sample_cleanup = cast(SampleCleanup, getattr(sandboxenv_type, "sample_cleanup"))
123
122
 
@@ -2,8 +2,6 @@ import os
2
2
  from logging import getLogger
3
3
  from pathlib import Path
4
4
 
5
- import aiofiles
6
-
7
5
  logger = getLogger(__name__)
8
6
 
9
7
 
@@ -17,7 +15,7 @@ CONFIG_FILES = [
17
15
  DOCKERFILE = "Dockerfile"
18
16
 
19
17
 
20
- async def resolve_compose_file(parent: str = "") -> str:
18
+ def resolve_compose_file(parent: str = "") -> str:
21
19
  # existing compose file provides all the config we need
22
20
  compose = find_compose_file(parent)
23
21
  if compose is not None:
@@ -29,11 +27,11 @@ async def resolve_compose_file(parent: str = "") -> str:
29
27
 
30
28
  # dockerfile just needs a compose.yaml synthesized
31
29
  elif has_dockerfile(parent):
32
- return await auto_compose_file(COMPOSE_DOCKERFILE_YAML, parent)
30
+ return auto_compose_file(COMPOSE_DOCKERFILE_YAML, parent)
33
31
 
34
32
  # otherwise provide a generic python container
35
33
  else:
36
- return await auto_compose_file(COMPOSE_GENERIC_YAML, parent)
34
+ return auto_compose_file(COMPOSE_GENERIC_YAML, parent)
37
35
 
38
36
 
39
37
  def find_compose_file(parent: str = "") -> str | None:
@@ -59,9 +57,9 @@ def is_auto_compose_file(file: str) -> bool:
59
57
  return os.path.basename(file) == AUTO_COMPOSE_YAML
60
58
 
61
59
 
62
- async def ensure_auto_compose_file(file: str | None) -> None:
60
+ def ensure_auto_compose_file(file: str | None) -> None:
63
61
  if file is not None and is_auto_compose_file(file) and not os.path.exists(file):
64
- await resolve_compose_file(os.path.dirname(file))
62
+ resolve_compose_file(os.path.dirname(file))
65
63
 
66
64
 
67
65
  def safe_cleanup_auto_compose(file: str | None) -> None:
@@ -100,8 +98,8 @@ services:
100
98
  """
101
99
 
102
100
 
103
- async def auto_compose_file(contents: str, parent: str = "") -> str:
101
+ def auto_compose_file(contents: str, parent: str = "") -> str:
104
102
  path = os.path.join(parent, AUTO_COMPOSE_YAML)
105
- async with aiofiles.open(path, "w", encoding="utf-8") as f:
106
- await f.write(contents)
103
+ with open(path, "w", encoding="utf-8") as f:
104
+ f.write(contents)
107
105
  return Path(path).resolve().as_posix()
@@ -5,7 +5,6 @@ from logging import getLogger
5
5
  from pathlib import Path, PurePosixPath
6
6
  from typing import Literal, Union, cast, overload
7
7
 
8
- import aiofiles
9
8
  from typing_extensions import override
10
9
 
11
10
  from inspect_ai.util._subprocess import ExecResult
@@ -54,6 +53,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
54
53
  def config_files(cls) -> list[str]:
55
54
  return CONFIG_FILES + [DOCKERFILE]
56
55
 
56
+ @classmethod
57
+ def default_concurrency(cls) -> int | None:
58
+ count = os.cpu_count() or 1
59
+ return 2 * count
60
+
57
61
  @classmethod
58
62
  async def task_init(
59
63
  cls, task_name: str, config: SandboxEnvironmentConfigType | None
@@ -403,11 +407,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
403
407
 
404
408
  # read and return w/ appropriate encoding
405
409
  if text:
406
- async with aiofiles.open(dest_file, "r", encoding="utf-8") as f:
407
- return await f.read()
410
+ with open(dest_file, "r", encoding="utf-8") as f:
411
+ return f.read()
408
412
  else:
409
- async with aiofiles.open(dest_file, "rb") as f:
410
- return await f.read()
413
+ with open(dest_file, "rb") as f:
414
+ return f.read()
411
415
 
412
416
  @override
413
417
  async def connection(self) -> SandboxConnection:
@@ -41,7 +41,7 @@ class ComposeProject:
41
41
 
42
42
  # if its a Dockerfile, then config is the auto-generated .compose.yaml
43
43
  if config_path and is_dockerfile(config_path.name):
44
- config = await auto_compose_file(
44
+ config = auto_compose_file(
45
45
  COMPOSE_DOCKERFILE_YAML, config_path.parent.as_posix()
46
46
  )
47
47
 
@@ -51,12 +51,12 @@ class ComposeProject:
51
51
 
52
52
  # no config passed, look for 'auto-config' (compose.yaml, Dockerfile, etc.)
53
53
  else:
54
- config = await resolve_compose_file()
54
+ config = resolve_compose_file()
55
55
 
56
56
  # this could be a cleanup where docker has tracked a .compose.yaml file
57
57
  # as part of its ConfigFiles and passed it back to us -- we in the
58
58
  # meantime have cleaned it up so we re-create it here as required
59
- await ensure_auto_compose_file(config)
59
+ ensure_auto_compose_file(config)
60
60
 
61
61
  # return project
62
62
  return ComposeProject(name, config, env)
@@ -53,6 +53,11 @@ class SandboxEnvironment(abc.ABC):
53
53
  """Standard config files for this provider (used for automatic discovery)"""
54
54
  return []
55
55
 
56
+ @classmethod
57
+ def default_concurrency(cls) -> int | None:
58
+ """Default max_sandboxes for this provider (`None` means no maximum)"""
59
+ return None
60
+
56
61
  @classmethod
57
62
  async def task_init(
58
63
  cls, task_name: str, config: SandboxEnvironmentConfigType | None
@@ -143,7 +148,7 @@ class SandboxEnvironment(abc.ABC):
143
148
  The current working directory for execution will be the per-sample
144
149
  filesystem context.
145
150
 
146
- Each output stream (stdout and stderr) is limited to 1 MiB. If exceeded, an
151
+ Each output stream (stdout and stderr) is limited to 10 MiB. If exceeded, an
147
152
  `OutputLimitExceededError` will be raised.
148
153
 
149
154
  Args:
@@ -164,7 +169,7 @@ class SandboxEnvironment(abc.ABC):
164
169
  PermissionError: If the user does not have
165
170
  permission to execute the command.
166
171
  OutputLimitExceededError: If an output stream
167
- exceeds the 1 MiB limit.
172
+ exceeds the 10 MiB limit.
168
173
  """
169
174
  ...
170
175
 
@@ -29,7 +29,7 @@ def verify_exec_result_size(exec_result: ExecResult[str]) -> None:
29
29
  """Verify the size of the output streams in an `ExecResult`.
30
30
 
31
31
  Raises:
32
- OutputLimitExceededError: If an output stream exceeds the 1 MiB limit.
32
+ OutputLimitExceededError: If an output stream exceeds the limit.
33
33
  """
34
34
  limit = SandboxEnvironmentLimits.MAX_EXEC_OUTPUT_SIZE
35
35
  stdout_truncated = truncate_string_to_bytes(exec_result.stdout, limit)
@@ -3,7 +3,6 @@ import warnings
3
3
  from pathlib import Path
4
4
  from typing import Literal, Union, cast, overload
5
5
 
6
- import aiofiles
7
6
  from typing_extensions import override
8
7
 
9
8
  from .._subprocess import ExecResult, subprocess
@@ -85,11 +84,11 @@ class LocalSandboxEnvironment(SandboxEnvironment):
85
84
  Path(file).parent.mkdir(parents=True, exist_ok=True)
86
85
 
87
86
  if isinstance(contents, str):
88
- async with aiofiles.open(file, "w", encoding="utf-8") as f:
89
- await f.write(contents)
87
+ with open(file, "w", encoding="utf-8") as f:
88
+ f.write(contents)
90
89
  else:
91
- async with aiofiles.open(file, "wb") as f:
92
- await f.write(contents)
90
+ with open(file, "wb") as f:
91
+ f.write(contents)
93
92
 
94
93
  @overload
95
94
  async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
@@ -102,11 +101,11 @@ class LocalSandboxEnvironment(SandboxEnvironment):
102
101
  file = self._resolve_file(file)
103
102
  verify_read_file_size(file)
104
103
  if text:
105
- async with aiofiles.open(file, "r", encoding="utf-8") as f:
106
- return await f.read()
104
+ with open(file, "r", encoding="utf-8") as f:
105
+ return f.read()
107
106
  else:
108
- async with aiofiles.open(file, "rb") as f:
109
- return await f.read()
107
+ with open(file, "rb") as f:
108
+ return f.read()
110
109
 
111
110
  def _resolve_file(self, file: str) -> str:
112
111
  path = Path(file)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: inspect_ai
3
- Version: 0.3.52
3
+ Version: 0.3.53
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Safety Institute
6
6
  License: MIT License
@@ -20,7 +20,6 @@ Classifier: Operating System :: OS Independent
20
20
  Requires-Python: >=3.10
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
- Requires-Dist: aiofiles
24
23
  Requires-Dist: aiohttp>=3.9.0
25
24
  Requires-Dist: anyio>=4.4.0
26
25
  Requires-Dist: beautifulsoup4
@@ -71,7 +70,6 @@ Requires-Dist: pytest-xdist; extra == "dev"
71
70
  Requires-Dist: ruff==0.8.3; extra == "dev"
72
71
  Requires-Dist: textual-dev>=0.86.2; extra == "dev"
73
72
  Requires-Dist: types-PyYAML; extra == "dev"
74
- Requires-Dist: types-aiofiles; extra == "dev"
75
73
  Requires-Dist: types-beautifulsoup4; extra == "dev"
76
74
  Requires-Dist: types-aioboto3; extra == "dev"
77
75
  Requires-Dist: types-boto3; extra == "dev"