inspect-ai 0.3.52__py3-none-any.whl → 0.3.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +29 -0
- inspect_ai/_display/core/progress.py +9 -3
- inspect_ai/_display/core/results.py +8 -4
- inspect_ai/_display/textual/widgets/task_detail.py +3 -0
- inspect_ai/_display/textual/widgets/tasks.py +86 -5
- inspect_ai/_eval/eval.py +16 -0
- inspect_ai/_eval/evalset.py +4 -0
- inspect_ai/_eval/registry.py +2 -2
- inspect_ai/_eval/task/results.py +22 -4
- inspect_ai/_eval/task/run.py +14 -10
- inspect_ai/_eval/task/sandbox.py +72 -43
- inspect_ai/_eval/task/task.py +4 -0
- inspect_ai/_eval/task/util.py +2 -0
- inspect_ai/_view/www/App.css +13 -0
- inspect_ai/_view/www/dist/assets/index.css +13 -0
- inspect_ai/_view/www/dist/assets/index.js +80 -43
- inspect_ai/_view/www/src/App.mjs +31 -6
- inspect_ai/_view/www/src/Types.mjs +6 -0
- inspect_ai/_view/www/src/components/JsonPanel.mjs +11 -17
- inspect_ai/_view/www/src/components/MessageContent.mjs +9 -2
- inspect_ai/_view/www/src/components/Tools.mjs +46 -18
- inspect_ai/_view/www/src/navbar/Navbar.mjs +12 -0
- inspect_ai/_view/www/src/samples/SampleList.mjs +2 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +2 -2
- inspect_ai/log/_log.py +3 -0
- inspect_ai/log/_recorders/eval.py +8 -7
- inspect_ai/model/_generate_config.py +6 -0
- inspect_ai/model/_providers/azureai.py +1 -1
- inspect_ai/model/_providers/bedrock.py +17 -1
- inspect_ai/model/_providers/hf.py +1 -1
- inspect_ai/model/_providers/openai.py +32 -8
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/vllm.py +1 -1
- inspect_ai/util/_sandbox/context.py +1 -2
- inspect_ai/util/_sandbox/docker/config.py +8 -10
- inspect_ai/util/_sandbox/docker/docker.py +9 -5
- inspect_ai/util/_sandbox/docker/util.py +3 -3
- inspect_ai/util/_sandbox/environment.py +7 -2
- inspect_ai/util/_sandbox/limits.py +1 -1
- inspect_ai/util/_sandbox/local.py +8 -9
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/METADATA +1 -3
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/RECORD +46 -46
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/top_level.txt +0 -0
@@ -63,26 +63,12 @@ export const ToolCallView = ({
|
|
63
63
|
output,
|
64
64
|
mode,
|
65
65
|
}) => {
|
66
|
-
const icon =
|
67
|
-
mode === "compact"
|
68
|
-
? ""
|
69
|
-
: html`<i
|
70
|
-
class="bi bi-tools"
|
71
|
-
style=${{
|
72
|
-
marginRight: "0.2rem",
|
73
|
-
opacity: "0.4",
|
74
|
-
}}
|
75
|
-
></i>`;
|
76
|
-
const codeIndent = mode === "compact" ? "" : "";
|
77
66
|
return html`<div>
|
78
|
-
${
|
79
|
-
|
80
|
-
? html`<code style=${{ fontSize: FontSize.small }}
|
81
|
-
>${view?.title || functionCall}</code
|
82
|
-
>`
|
67
|
+
${mode !== "compact" && (!view || view.title)
|
68
|
+
? html`<${ToolTitle} title=${view?.title || functionCall} />`
|
83
69
|
: ""}
|
84
70
|
<div>
|
85
|
-
<div
|
71
|
+
<div>
|
86
72
|
<${ToolInput}
|
87
73
|
type=${inputType}
|
88
74
|
contents=${input}
|
@@ -92,7 +78,7 @@ export const ToolCallView = ({
|
|
92
78
|
${output
|
93
79
|
? html`
|
94
80
|
<${ExpandablePanel} collapse=${true} border=${true} lines=${15}>
|
95
|
-
<${MessageContent} contents=${output} />
|
81
|
+
<${MessageContent} contents=${normalizeContent(output)} />
|
96
82
|
</${ExpandablePanel}>`
|
97
83
|
: ""}
|
98
84
|
</div>
|
@@ -100,6 +86,48 @@ export const ToolCallView = ({
|
|
100
86
|
</div>`;
|
101
87
|
};
|
102
88
|
|
89
|
+
/**
|
90
|
+
* Renders the ToolCallView component.
|
91
|
+
*
|
92
|
+
* @param {Object} props - The parameters for the component.
|
93
|
+
* @param {string} props.title - The title for the tool call
|
94
|
+
* @returns {import("preact").JSX.Element} The SampleTranscript component.
|
95
|
+
*/
|
96
|
+
const ToolTitle = ({ title }) => {
|
97
|
+
return html` <i
|
98
|
+
class="bi bi-tools"
|
99
|
+
style=${{
|
100
|
+
marginRight: "0.2rem",
|
101
|
+
opacity: "0.4",
|
102
|
+
}}
|
103
|
+
></i>
|
104
|
+
<code style=${{ fontSize: FontSize.small }}>${title}</code>`;
|
105
|
+
};
|
106
|
+
|
107
|
+
/**
|
108
|
+
* Renders the ToolCallView component.
|
109
|
+
*
|
110
|
+
* @param {string | number | boolean | (import("../types/log").ContentText | import("../types/log").ContentImage)[]} output - The tool output
|
111
|
+
* @returns {(import("../Types.mjs").ContentTool | import("../types/log").ContentText | import("../types/log").ContentImage)[]} The SampleTranscript component.
|
112
|
+
*/
|
113
|
+
const normalizeContent = (output) => {
|
114
|
+
if (Array.isArray(output)) {
|
115
|
+
return output;
|
116
|
+
} else {
|
117
|
+
return [
|
118
|
+
{
|
119
|
+
type: "tool",
|
120
|
+
content: [
|
121
|
+
{
|
122
|
+
type: "text",
|
123
|
+
text: String(output),
|
124
|
+
},
|
125
|
+
],
|
126
|
+
},
|
127
|
+
];
|
128
|
+
}
|
129
|
+
};
|
130
|
+
|
103
131
|
/**
|
104
132
|
* Renders the ToolInput component.
|
105
133
|
*
|
@@ -255,6 +255,8 @@ const ResultsPanel = ({ results }) => {
|
|
255
255
|
justifyContent: "end",
|
256
256
|
height: "100%",
|
257
257
|
alignItems: "center",
|
258
|
+
maxHeight: "15em",
|
259
|
+
overflow: "scroll",
|
258
260
|
}}
|
259
261
|
>
|
260
262
|
${metrics.map((metric, i) => {
|
@@ -273,6 +275,8 @@ const ResultsPanel = ({ results }) => {
|
|
273
275
|
marginTop: "0.2rem",
|
274
276
|
paddingBottom: "0.4rem",
|
275
277
|
rowGap: "1em",
|
278
|
+
maxHeight: "15em",
|
279
|
+
overflow: "scroll",
|
276
280
|
}}
|
277
281
|
>
|
278
282
|
${results?.scores?.map((score, index) => {
|
@@ -285,6 +289,14 @@ const ResultsPanel = ({ results }) => {
|
|
285
289
|
}
|
286
290
|
};
|
287
291
|
|
292
|
+
/** Renders a Vertial Metric
|
293
|
+
*
|
294
|
+
* @param {Object} props - The parameters for the component.
|
295
|
+
* @param {import("../types/log").EvalMetric} props.metric - The metric
|
296
|
+
* @param {boolean} props.isFirst - Whether this is the first metric
|
297
|
+
*
|
298
|
+
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
299
|
+
*/
|
288
300
|
const VerticalMetric = ({ metric, isFirst }) => {
|
289
301
|
const reducer_component = metric.reducer
|
290
302
|
? html` <div
|
@@ -145,7 +145,7 @@ export const SampleList = (props) => {
|
|
145
145
|
);
|
146
146
|
|
147
147
|
const listStyle = { ...style, flex: "1", overflowY: "auto", outline: "none" };
|
148
|
-
const { limit, answer } = gridColumns(sampleDescriptor);
|
148
|
+
const { limit, answer, target } = gridColumns(sampleDescriptor);
|
149
149
|
|
150
150
|
const headerRow = html`<div
|
151
151
|
style=${{
|
@@ -161,7 +161,7 @@ export const SampleList = (props) => {
|
|
161
161
|
>
|
162
162
|
<div>Id</div>
|
163
163
|
<div>Input</div>
|
164
|
-
<div
|
164
|
+
<div>${target !== "0" ? "Target" : ""}</div>
|
165
165
|
<div>${answer !== "0" ? "Answer" : ""}</div>
|
166
166
|
<div>${limit !== "0" ? "Limit" : ""}</div>
|
167
167
|
<div style=${{ justifySelf: "center" }}>Score</div>
|
@@ -29,10 +29,10 @@ export const ToolEventView = ({ id, event, style, depth }) => {
|
|
29
29
|
return e.event === "approval";
|
30
30
|
});
|
31
31
|
|
32
|
-
const title = `Tool: ${event.function}`;
|
32
|
+
const title = `Tool: ${event.view?.title || event.function}`;
|
33
33
|
return html`
|
34
34
|
<${EventPanel} id=${id} title="${title}" subTitle=${formatDateTime(new Date(event.timestamp))} icon=${ApplicationIcons.solvers.use_tools} style=${style}>
|
35
|
-
<div name="Summary" style=${{ margin: "0.5em 0" }}>
|
35
|
+
<div name="Summary" style=${{ margin: "0.5em 0", width: "100%" }}>
|
36
36
|
<${ToolCallView}
|
37
37
|
functionCall=${functionCall}
|
38
38
|
input=${input}
|
inspect_ai/log/_log.py
CHANGED
@@ -79,6 +79,9 @@ class EvalConfig(BaseModel):
|
|
79
79
|
max_subprocesses: int | None = Field(default=None)
|
80
80
|
"""Maximum number of subprocesses to run concurrently."""
|
81
81
|
|
82
|
+
max_sandboxes: int | None = Field(default=None)
|
83
|
+
"""Maximum number of sandboxes to run concurrently."""
|
84
|
+
|
82
85
|
sandbox_cleanup: bool | None = Field(default=None)
|
83
86
|
"""Cleanup sandbox environments after task completes."""
|
84
87
|
|
@@ -362,13 +362,14 @@ class ZipLogFile:
|
|
362
362
|
f"Error occurred during async write to {self._file}: {ex}. Falling back to sync write."
|
363
363
|
)
|
364
364
|
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
365
|
+
try:
|
366
|
+
# write sync if we need to
|
367
|
+
if not written:
|
368
|
+
with file(self._file, "wb") as f:
|
369
|
+
f.write(log_bytes)
|
370
|
+
finally:
|
371
|
+
# re-open zip file w/ self.temp_file pointer at end
|
372
|
+
self._open()
|
372
373
|
|
373
374
|
async def close(self) -> EvalLog:
|
374
375
|
async with self._lock:
|
@@ -72,6 +72,9 @@ class GenerateConfigArgs(TypedDict, total=False):
|
|
72
72
|
cache_prompt: Literal["auto"] | bool | None
|
73
73
|
"""Whether to cache the prompt prefix. Defaults to "auto", which will enable caching for requests with tools. Anthropic only."""
|
74
74
|
|
75
|
+
reasoning_effort: Literal["low", "medium", "high"] | None
|
76
|
+
"""Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
|
77
|
+
|
75
78
|
|
76
79
|
class GenerateConfig(BaseModel):
|
77
80
|
"""Base class for model generation configs."""
|
@@ -139,6 +142,9 @@ class GenerateConfig(BaseModel):
|
|
139
142
|
cache_prompt: Literal["auto"] | bool | None = Field(default=None)
|
140
143
|
"""Whether to cache the prompt prefix. Defaults to "auto", which will enable caching for requests with tools. Anthropic only."""
|
141
144
|
|
145
|
+
reasoning_effort: Literal["low", "medium", "high"] | None = Field(default=None)
|
146
|
+
"""Constrains effort on reasoning for reasoning models. Open AI o1 models only."""
|
147
|
+
|
142
148
|
def merge(
|
143
149
|
self, other: Union["GenerateConfig", GenerateConfigArgs]
|
144
150
|
) -> "GenerateConfig":
|
@@ -236,15 +236,21 @@ class BedrockAPI(ModelAPI):
|
|
236
236
|
self,
|
237
237
|
model_name: str,
|
238
238
|
base_url: str | None,
|
239
|
+
api_key: str | None = None,
|
239
240
|
config: GenerateConfig = GenerateConfig(),
|
240
241
|
**model_args: Any,
|
241
242
|
):
|
242
243
|
super().__init__(
|
243
244
|
model_name=model_name,
|
244
245
|
base_url=model_base_url(base_url, "BEDROCK_BASE_URL"),
|
246
|
+
api_key=api_key,
|
247
|
+
api_key_vars=[],
|
245
248
|
config=config,
|
246
249
|
)
|
247
250
|
|
251
|
+
# save model_args
|
252
|
+
self.model_args = model_args
|
253
|
+
|
248
254
|
# import aioboto3 on demand
|
249
255
|
try:
|
250
256
|
import aioboto3
|
@@ -263,6 +269,9 @@ class BedrockAPI(ModelAPI):
|
|
263
269
|
|
264
270
|
@override
|
265
271
|
def max_tokens(self) -> int | None:
|
272
|
+
if "llama3-70" in self.model_name or "llama3-8" in self.model_name:
|
273
|
+
return 2048
|
274
|
+
|
266
275
|
if "llama3" in self.model_name or "claude3" in self.model_name:
|
267
276
|
return 4096
|
268
277
|
|
@@ -316,6 +325,7 @@ class BedrockAPI(ModelAPI):
|
|
316
325
|
mode="adaptive",
|
317
326
|
),
|
318
327
|
),
|
328
|
+
**self.model_args,
|
319
329
|
) as client:
|
320
330
|
# Process the tools
|
321
331
|
resolved_tools = converse_tools(tools)
|
@@ -658,6 +668,8 @@ def converse_image_type(type: str) -> ConverseImageFormat:
|
|
658
668
|
return "png"
|
659
669
|
case "image/webp":
|
660
670
|
return "webp"
|
671
|
+
case "image/jpeg":
|
672
|
+
return "jpeg"
|
661
673
|
case _:
|
662
674
|
raise ValueError(
|
663
675
|
f"Image mime type {type} is not supported for Bedrock Converse models."
|
@@ -673,7 +685,11 @@ def converse_tools(tools: list[ToolInfo]) -> list[ConverseTool] | None:
|
|
673
685
|
tool_spec = ConverseToolSpec(
|
674
686
|
name=tool.name,
|
675
687
|
description=tool.description,
|
676
|
-
inputSchema={
|
688
|
+
inputSchema={
|
689
|
+
"json": tool.parameters.model_dump(
|
690
|
+
exclude_none=True, exclude={"additionalProperties"}
|
691
|
+
)
|
692
|
+
},
|
677
693
|
)
|
678
694
|
result.append(ConverseTool(toolSpec=tool_spec))
|
679
695
|
return result
|
@@ -18,6 +18,7 @@ from openai.types.chat import (
|
|
18
18
|
ChatCompletionContentPartImageParam,
|
19
19
|
ChatCompletionContentPartParam,
|
20
20
|
ChatCompletionContentPartTextParam,
|
21
|
+
ChatCompletionDeveloperMessageParam,
|
21
22
|
ChatCompletionMessage,
|
22
23
|
ChatCompletionMessageParam,
|
23
24
|
ChatCompletionMessageToolCallParam,
|
@@ -141,6 +142,18 @@ class OpenAIAPI(ModelAPI):
|
|
141
142
|
**model_args,
|
142
143
|
)
|
143
144
|
|
145
|
+
def is_o1(self) -> bool:
|
146
|
+
return self.model_name.startswith("o1")
|
147
|
+
|
148
|
+
def is_o1_full(self) -> bool:
|
149
|
+
return self.is_o1() and not self.is_o1_mini() and not self.is_o1_preview()
|
150
|
+
|
151
|
+
def is_o1_mini(self) -> bool:
|
152
|
+
return self.model_name.startswith("o1-mini")
|
153
|
+
|
154
|
+
def is_o1_preview(self) -> bool:
|
155
|
+
return self.model_name.startswith("o1-preview")
|
156
|
+
|
144
157
|
async def generate(
|
145
158
|
self,
|
146
159
|
input: list[ChatMessage],
|
@@ -148,8 +161,8 @@ class OpenAIAPI(ModelAPI):
|
|
148
161
|
tool_choice: ToolChoice,
|
149
162
|
config: GenerateConfig,
|
150
163
|
) -> ModelOutput | tuple[ModelOutput, ModelCall]:
|
151
|
-
# short-circuit to call o1-
|
152
|
-
if self.
|
164
|
+
# short-circuit to call o1- models that are text only
|
165
|
+
if self.is_o1_preview() or self.is_o1_mini():
|
153
166
|
return await generate_o1(
|
154
167
|
client=self.client,
|
155
168
|
input=input,
|
@@ -179,7 +192,7 @@ class OpenAIAPI(ModelAPI):
|
|
179
192
|
|
180
193
|
# prepare request (we do this so we can log the ModelCall)
|
181
194
|
request = dict(
|
182
|
-
messages=await as_openai_chat_messages(input),
|
195
|
+
messages=await as_openai_chat_messages(input, self.is_o1_full()),
|
183
196
|
tools=chat_tools(tools) if len(tools) > 0 else NOT_GIVEN,
|
184
197
|
tool_choice=chat_tool_choice(tool_choice) if len(tools) > 0 else NOT_GIVEN,
|
185
198
|
**self.completion_params(config, len(tools) > 0),
|
@@ -271,8 +284,10 @@ class OpenAIAPI(ModelAPI):
|
|
271
284
|
params["logprobs"] = config.logprobs
|
272
285
|
if config.top_logprobs is not None:
|
273
286
|
params["top_logprobs"] = config.top_logprobs
|
274
|
-
if tools and config.parallel_tool_calls is not None:
|
287
|
+
if tools and config.parallel_tool_calls is not None and not self.is_o1():
|
275
288
|
params["parallel_tool_calls"] = config.parallel_tool_calls
|
289
|
+
if config.reasoning_effort is not None and self.is_o1_full():
|
290
|
+
params["reasoning_effort"] = config.reasoning_effort
|
276
291
|
|
277
292
|
return params
|
278
293
|
|
@@ -291,14 +306,23 @@ class OpenAIAPI(ModelAPI):
|
|
291
306
|
|
292
307
|
|
293
308
|
async def as_openai_chat_messages(
|
294
|
-
messages: list[ChatMessage],
|
309
|
+
messages: list[ChatMessage], o1_full: bool
|
295
310
|
) -> list[ChatCompletionMessageParam]:
|
296
|
-
return [await openai_chat_message(message) for message in messages]
|
311
|
+
return [await openai_chat_message(message, o1_full) for message in messages]
|
297
312
|
|
298
313
|
|
299
|
-
async def openai_chat_message(
|
314
|
+
async def openai_chat_message(
|
315
|
+
message: ChatMessage, o1_full: bool
|
316
|
+
) -> ChatCompletionMessageParam:
|
300
317
|
if message.role == "system":
|
301
|
-
|
318
|
+
if o1_full:
|
319
|
+
return ChatCompletionDeveloperMessageParam(
|
320
|
+
role="developer", content=message.text
|
321
|
+
)
|
322
|
+
else:
|
323
|
+
return ChatCompletionSystemMessageParam(
|
324
|
+
role=message.role, content=message.text
|
325
|
+
)
|
302
326
|
elif message.role == "user":
|
303
327
|
return ChatCompletionUserMessageParam(
|
304
328
|
role=message.role,
|
@@ -109,7 +109,7 @@ def raise_no_sandbox() -> NoReturn:
|
|
109
109
|
|
110
110
|
|
111
111
|
async def init_sandbox_environments_sample(
|
112
|
-
|
112
|
+
sandboxenv_type: type[SandboxEnvironment],
|
113
113
|
task_name: str,
|
114
114
|
config: SandboxEnvironmentConfigType | None,
|
115
115
|
files: dict[str, bytes],
|
@@ -117,7 +117,6 @@ async def init_sandbox_environments_sample(
|
|
117
117
|
metadata: dict[str, Any],
|
118
118
|
) -> dict[str, SandboxEnvironment]:
|
119
119
|
# get setup and cleanup functions
|
120
|
-
sandboxenv_type = registry_find_sandboxenv(type)
|
121
120
|
sample_init = cast(SampleInit, getattr(sandboxenv_type, "sample_init"))
|
122
121
|
sample_cleanup = cast(SampleCleanup, getattr(sandboxenv_type, "sample_cleanup"))
|
123
122
|
|
@@ -2,8 +2,6 @@ import os
|
|
2
2
|
from logging import getLogger
|
3
3
|
from pathlib import Path
|
4
4
|
|
5
|
-
import aiofiles
|
6
|
-
|
7
5
|
logger = getLogger(__name__)
|
8
6
|
|
9
7
|
|
@@ -17,7 +15,7 @@ CONFIG_FILES = [
|
|
17
15
|
DOCKERFILE = "Dockerfile"
|
18
16
|
|
19
17
|
|
20
|
-
|
18
|
+
def resolve_compose_file(parent: str = "") -> str:
|
21
19
|
# existing compose file provides all the config we need
|
22
20
|
compose = find_compose_file(parent)
|
23
21
|
if compose is not None:
|
@@ -29,11 +27,11 @@ async def resolve_compose_file(parent: str = "") -> str:
|
|
29
27
|
|
30
28
|
# dockerfile just needs a compose.yaml synthesized
|
31
29
|
elif has_dockerfile(parent):
|
32
|
-
return
|
30
|
+
return auto_compose_file(COMPOSE_DOCKERFILE_YAML, parent)
|
33
31
|
|
34
32
|
# otherwise provide a generic python container
|
35
33
|
else:
|
36
|
-
return
|
34
|
+
return auto_compose_file(COMPOSE_GENERIC_YAML, parent)
|
37
35
|
|
38
36
|
|
39
37
|
def find_compose_file(parent: str = "") -> str | None:
|
@@ -59,9 +57,9 @@ def is_auto_compose_file(file: str) -> bool:
|
|
59
57
|
return os.path.basename(file) == AUTO_COMPOSE_YAML
|
60
58
|
|
61
59
|
|
62
|
-
|
60
|
+
def ensure_auto_compose_file(file: str | None) -> None:
|
63
61
|
if file is not None and is_auto_compose_file(file) and not os.path.exists(file):
|
64
|
-
|
62
|
+
resolve_compose_file(os.path.dirname(file))
|
65
63
|
|
66
64
|
|
67
65
|
def safe_cleanup_auto_compose(file: str | None) -> None:
|
@@ -100,8 +98,8 @@ services:
|
|
100
98
|
"""
|
101
99
|
|
102
100
|
|
103
|
-
|
101
|
+
def auto_compose_file(contents: str, parent: str = "") -> str:
|
104
102
|
path = os.path.join(parent, AUTO_COMPOSE_YAML)
|
105
|
-
|
106
|
-
|
103
|
+
with open(path, "w", encoding="utf-8") as f:
|
104
|
+
f.write(contents)
|
107
105
|
return Path(path).resolve().as_posix()
|
@@ -5,7 +5,6 @@ from logging import getLogger
|
|
5
5
|
from pathlib import Path, PurePosixPath
|
6
6
|
from typing import Literal, Union, cast, overload
|
7
7
|
|
8
|
-
import aiofiles
|
9
8
|
from typing_extensions import override
|
10
9
|
|
11
10
|
from inspect_ai.util._subprocess import ExecResult
|
@@ -54,6 +53,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
54
53
|
def config_files(cls) -> list[str]:
|
55
54
|
return CONFIG_FILES + [DOCKERFILE]
|
56
55
|
|
56
|
+
@classmethod
|
57
|
+
def default_concurrency(cls) -> int | None:
|
58
|
+
count = os.cpu_count() or 1
|
59
|
+
return 2 * count
|
60
|
+
|
57
61
|
@classmethod
|
58
62
|
async def task_init(
|
59
63
|
cls, task_name: str, config: SandboxEnvironmentConfigType | None
|
@@ -403,11 +407,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
403
407
|
|
404
408
|
# read and return w/ appropriate encoding
|
405
409
|
if text:
|
406
|
-
|
407
|
-
return
|
410
|
+
with open(dest_file, "r", encoding="utf-8") as f:
|
411
|
+
return f.read()
|
408
412
|
else:
|
409
|
-
|
410
|
-
return
|
413
|
+
with open(dest_file, "rb") as f:
|
414
|
+
return f.read()
|
411
415
|
|
412
416
|
@override
|
413
417
|
async def connection(self) -> SandboxConnection:
|
@@ -41,7 +41,7 @@ class ComposeProject:
|
|
41
41
|
|
42
42
|
# if its a Dockerfile, then config is the auto-generated .compose.yaml
|
43
43
|
if config_path and is_dockerfile(config_path.name):
|
44
|
-
config =
|
44
|
+
config = auto_compose_file(
|
45
45
|
COMPOSE_DOCKERFILE_YAML, config_path.parent.as_posix()
|
46
46
|
)
|
47
47
|
|
@@ -51,12 +51,12 @@ class ComposeProject:
|
|
51
51
|
|
52
52
|
# no config passed, look for 'auto-config' (compose.yaml, Dockerfile, etc.)
|
53
53
|
else:
|
54
|
-
config =
|
54
|
+
config = resolve_compose_file()
|
55
55
|
|
56
56
|
# this could be a cleanup where docker has tracked a .compose.yaml file
|
57
57
|
# as part of its ConfigFiles and passed it back to us -- we in the
|
58
58
|
# meantime have cleaned it up so we re-create it here as required
|
59
|
-
|
59
|
+
ensure_auto_compose_file(config)
|
60
60
|
|
61
61
|
# return project
|
62
62
|
return ComposeProject(name, config, env)
|
@@ -53,6 +53,11 @@ class SandboxEnvironment(abc.ABC):
|
|
53
53
|
"""Standard config files for this provider (used for automatic discovery)"""
|
54
54
|
return []
|
55
55
|
|
56
|
+
@classmethod
|
57
|
+
def default_concurrency(cls) -> int | None:
|
58
|
+
"""Default max_sandboxes for this provider (`None` means no maximum)"""
|
59
|
+
return None
|
60
|
+
|
56
61
|
@classmethod
|
57
62
|
async def task_init(
|
58
63
|
cls, task_name: str, config: SandboxEnvironmentConfigType | None
|
@@ -143,7 +148,7 @@ class SandboxEnvironment(abc.ABC):
|
|
143
148
|
The current working directory for execution will be the per-sample
|
144
149
|
filesystem context.
|
145
150
|
|
146
|
-
Each output stream (stdout and stderr) is limited to
|
151
|
+
Each output stream (stdout and stderr) is limited to 10 MiB. If exceeded, an
|
147
152
|
`OutputLimitExceededError` will be raised.
|
148
153
|
|
149
154
|
Args:
|
@@ -164,7 +169,7 @@ class SandboxEnvironment(abc.ABC):
|
|
164
169
|
PermissionError: If the user does not have
|
165
170
|
permission to execute the command.
|
166
171
|
OutputLimitExceededError: If an output stream
|
167
|
-
exceeds the
|
172
|
+
exceeds the 10 MiB limit.
|
168
173
|
"""
|
169
174
|
...
|
170
175
|
|
@@ -29,7 +29,7 @@ def verify_exec_result_size(exec_result: ExecResult[str]) -> None:
|
|
29
29
|
"""Verify the size of the output streams in an `ExecResult`.
|
30
30
|
|
31
31
|
Raises:
|
32
|
-
OutputLimitExceededError: If an output stream exceeds the
|
32
|
+
OutputLimitExceededError: If an output stream exceeds the limit.
|
33
33
|
"""
|
34
34
|
limit = SandboxEnvironmentLimits.MAX_EXEC_OUTPUT_SIZE
|
35
35
|
stdout_truncated = truncate_string_to_bytes(exec_result.stdout, limit)
|
@@ -3,7 +3,6 @@ import warnings
|
|
3
3
|
from pathlib import Path
|
4
4
|
from typing import Literal, Union, cast, overload
|
5
5
|
|
6
|
-
import aiofiles
|
7
6
|
from typing_extensions import override
|
8
7
|
|
9
8
|
from .._subprocess import ExecResult, subprocess
|
@@ -85,11 +84,11 @@ class LocalSandboxEnvironment(SandboxEnvironment):
|
|
85
84
|
Path(file).parent.mkdir(parents=True, exist_ok=True)
|
86
85
|
|
87
86
|
if isinstance(contents, str):
|
88
|
-
|
89
|
-
|
87
|
+
with open(file, "w", encoding="utf-8") as f:
|
88
|
+
f.write(contents)
|
90
89
|
else:
|
91
|
-
|
92
|
-
|
90
|
+
with open(file, "wb") as f:
|
91
|
+
f.write(contents)
|
93
92
|
|
94
93
|
@overload
|
95
94
|
async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
|
@@ -102,11 +101,11 @@ class LocalSandboxEnvironment(SandboxEnvironment):
|
|
102
101
|
file = self._resolve_file(file)
|
103
102
|
verify_read_file_size(file)
|
104
103
|
if text:
|
105
|
-
|
106
|
-
return
|
104
|
+
with open(file, "r", encoding="utf-8") as f:
|
105
|
+
return f.read()
|
107
106
|
else:
|
108
|
-
|
109
|
-
return
|
107
|
+
with open(file, "rb") as f:
|
108
|
+
return f.read()
|
110
109
|
|
111
110
|
def _resolve_file(self, file: str) -> str:
|
112
111
|
path = Path(file)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.53
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Safety Institute
|
6
6
|
License: MIT License
|
@@ -20,7 +20,6 @@ Classifier: Operating System :: OS Independent
|
|
20
20
|
Requires-Python: >=3.10
|
21
21
|
Description-Content-Type: text/markdown
|
22
22
|
License-File: LICENSE
|
23
|
-
Requires-Dist: aiofiles
|
24
23
|
Requires-Dist: aiohttp>=3.9.0
|
25
24
|
Requires-Dist: anyio>=4.4.0
|
26
25
|
Requires-Dist: beautifulsoup4
|
@@ -71,7 +70,6 @@ Requires-Dist: pytest-xdist; extra == "dev"
|
|
71
70
|
Requires-Dist: ruff==0.8.3; extra == "dev"
|
72
71
|
Requires-Dist: textual-dev>=0.86.2; extra == "dev"
|
73
72
|
Requires-Dist: types-PyYAML; extra == "dev"
|
74
|
-
Requires-Dist: types-aiofiles; extra == "dev"
|
75
73
|
Requires-Dist: types-beautifulsoup4; extra == "dev"
|
76
74
|
Requires-Dist: types-aioboto3; extra == "dev"
|
77
75
|
Requires-Dist: types-boto3; extra == "dev"
|