inspect-ai 0.3.70__py3-none-any.whl → 0.3.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +14 -8
- inspect_ai/_display/core/display.py +2 -0
- inspect_ai/_display/core/footer.py +13 -3
- inspect_ai/_display/plain/display.py +6 -2
- inspect_ai/_display/rich/display.py +19 -6
- inspect_ai/_display/textual/app.py +6 -1
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/transcript.py +10 -6
- inspect_ai/_eval/task/run.py +5 -8
- inspect_ai/_util/content.py +20 -1
- inspect_ai/_util/transcript.py +10 -4
- inspect_ai/_util/working.py +4 -0
- inspect_ai/_view/www/App.css +6 -0
- inspect_ai/_view/www/dist/assets/index.css +115 -87
- inspect_ai/_view/www/dist/assets/index.js +5324 -2276
- inspect_ai/_view/www/eslint.config.mjs +24 -1
- inspect_ai/_view/www/log-schema.json +283 -20
- inspect_ai/_view/www/package.json +8 -3
- inspect_ai/_view/www/src/App.tsx +2 -2
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +4 -3
- inspect_ai/_view/www/src/components/Card.tsx +9 -8
- inspect_ai/_view/www/src/components/DownloadButton.tsx +2 -1
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +4 -3
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +13 -5
- inspect_ai/_view/www/src/components/FindBand.tsx +3 -3
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +3 -3
- inspect_ai/_view/www/src/components/LabeledValue.tsx +5 -4
- inspect_ai/_view/www/src/components/LargeModal.tsx +18 -13
- inspect_ai/_view/www/src/components/{LightboxCarousel.css → LightboxCarousel.module.css} +22 -18
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +36 -27
- inspect_ai/_view/www/src/components/MessageBand.tsx +2 -1
- inspect_ai/_view/www/src/components/NavPills.tsx +9 -8
- inspect_ai/_view/www/src/components/ProgressBar.tsx +2 -1
- inspect_ai/_view/www/src/components/TabSet.tsx +21 -15
- inspect_ai/_view/www/src/index.tsx +2 -2
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +11 -9
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +3 -2
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +1 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +16 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +3 -2
- inspect_ai/_view/www/src/plan/DetailStep.tsx +2 -1
- inspect_ai/_view/www/src/plan/PlanCard.tsx +2 -5
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +6 -9
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +2 -1
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +3 -3
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +2 -2
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +3 -3
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +2 -2
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +3 -19
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +22 -7
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +35 -6
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -2
- inspect_ai/_view/www/src/samples/chat/messages.ts +15 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +13 -4
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +18 -19
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +1 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +4 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +2 -3
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +3 -2
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +57 -45
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +2 -1
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +4 -3
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +2 -5
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +2 -1
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +12 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +1 -1
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +25 -28
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +5 -4
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +8 -7
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +18 -14
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -5
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +34 -15
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +3 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.module.css +28 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.tsx +115 -0
- inspect_ai/_view/www/src/samples/transcript/event/utils.ts +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +11 -8
- inspect_ai/_view/www/src/types/log.d.ts +129 -34
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +6 -10
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +4 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +32 -9
- inspect_ai/_view/www/src/usage/TokenTable.tsx +4 -6
- inspect_ai/_view/www/src/usage/UsageCard.tsx +2 -1
- inspect_ai/_view/www/src/utils/format.ts +1 -1
- inspect_ai/_view/www/src/utils/json.ts +24 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +6 -5
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -2
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +3 -3
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +4 -3
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +5 -4
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +5 -8
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +5 -4
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -2
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +2 -5
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +12 -11
- inspect_ai/_view/www/yarn.lock +241 -5
- inspect_ai/log/_condense.py +3 -0
- inspect_ai/log/_recorders/eval.py +6 -1
- inspect_ai/log/_transcript.py +58 -1
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_call_tools.py +7 -0
- inspect_ai/model/_chat_message.py +22 -7
- inspect_ai/model/_conversation.py +10 -8
- inspect_ai/model/_generate_config.py +25 -4
- inspect_ai/model/_model.py +133 -57
- inspect_ai/model/_model_output.py +3 -0
- inspect_ai/model/_openai.py +106 -40
- inspect_ai/model/_providers/anthropic.py +134 -26
- inspect_ai/model/_providers/google.py +27 -8
- inspect_ai/model/_providers/groq.py +9 -4
- inspect_ai/model/_providers/openai.py +57 -4
- inspect_ai/model/_providers/openai_o1.py +10 -0
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_reasoning.py +15 -2
- inspect_ai/scorer/_model.py +23 -19
- inspect_ai/solver/_human_agent/agent.py +14 -10
- inspect_ai/solver/_human_agent/commands/__init__.py +7 -3
- inspect_ai/solver/_human_agent/commands/submit.py +76 -30
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +3 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +8 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +24 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +25 -0
- inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +5 -6
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +10 -11
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +71 -0
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +323 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +5 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +279 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +9 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +293 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +94 -0
- inspect_ai/tool/_tools/_web_browser/_resources/constants.py +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +50 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +31 -359
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +280 -0
- inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +65 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +146 -0
- inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +180 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +15 -9
- inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +15 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +44 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +39 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +198 -48
- inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +26 -25
- inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +178 -39
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +38 -19
- inspect_ai/util/__init__.py +2 -1
- inspect_ai/util/_display.py +12 -0
- inspect_ai/util/_sandbox/events.py +55 -21
- inspect_ai/util/_sandbox/self_check.py +131 -43
- inspect_ai/util/_subtask.py +11 -0
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.71.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.71.dist-info}/RECORD +197 -182
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.71.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
- inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
- inspect_ai/_view/www/src/components/VirtualList.module.css +0 -19
- inspect_ai/_view/www/src/components/VirtualList.tsx +0 -292
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py +0 -312
- inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +0 -275
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.png +0 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_node.py +0 -176
- inspect_ai/tool/_tools/_web_browser/_resources/test_dm_env_servicer.py +0 -135
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_environment.py +0 -71
- inspect_ai/tool/_tools/_web_browser/_resources/web_environment.py +0 -184
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.71.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.71.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.71.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,14 @@ from inspect_ai.util import (
|
|
9
9
|
SandboxEnvironmentLimits,
|
10
10
|
)
|
11
11
|
|
12
|
+
# If you're wondering these tests are not using pytest fixtures,
|
13
|
+
# see the discussion https://github.com/UKGovernmentBEIS/inspect_ai/pull/347
|
14
|
+
# It's not ideal, so a PR to fix this would be welcome.
|
15
|
+
#
|
16
|
+
# If you are struggling to debug a failing one of these, two tips:
|
17
|
+
# 1. Comment out everything apart from the failing test in the list in the `self_check` function
|
18
|
+
# 2. Get rid of the try/catch in check_test_fn (the body can just be `await fn(sandbox_env); return True`
|
19
|
+
|
12
20
|
|
13
21
|
async def check_test_fn(
|
14
22
|
fn: Callable[[SandboxEnvironment], Coroutine[Any, Any, None]],
|
@@ -20,7 +28,7 @@ async def check_test_fn(
|
|
20
28
|
except AssertionError as e:
|
21
29
|
return f"FAILED: [{str(e)}]"
|
22
30
|
except Exception as e:
|
23
|
-
return f"ERROR: {repr(e)}"
|
31
|
+
return f"ERROR: [{repr(e)}]"
|
24
32
|
|
25
33
|
|
26
34
|
async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
|
@@ -92,8 +100,12 @@ async def test_write_file_text_utf(sandbox_env: SandboxEnvironment) -> None:
|
|
92
100
|
file_name = "test_write_file_text_utf.file"
|
93
101
|
await sandbox_env.write_file(file_name, utf_content)
|
94
102
|
file_with_utf_content = await sandbox_env.read_file(file_name, text=True)
|
95
|
-
assert isinstance(file_with_utf_content, str)
|
96
|
-
|
103
|
+
assert isinstance(file_with_utf_content, str), (
|
104
|
+
f"Expected file content to be a string, got {type(file_with_utf_content)}"
|
105
|
+
)
|
106
|
+
assert file_with_utf_content == utf_content, (
|
107
|
+
f"UTF-8 content should match, got {file_with_utf_content=}; expected {utf_content=}"
|
108
|
+
)
|
97
109
|
await _cleanup_file(sandbox_env, file_name)
|
98
110
|
|
99
111
|
|
@@ -104,7 +116,7 @@ async def test_read_and_write_file_binary(sandbox_env: SandboxEnvironment) -> No
|
|
104
116
|
) # invalid UTF-8 from https://stackoverflow.com/a/17199164/116509
|
105
117
|
|
106
118
|
written_file_bytes = await sandbox_env.read_file(file_name, text=False)
|
107
|
-
assert b"\xc3\x28" == written_file_bytes
|
119
|
+
assert b"\xc3\x28" == written_file_bytes, "Binary content should match"
|
108
120
|
await _cleanup_file(sandbox_env, file_name)
|
109
121
|
|
110
122
|
|
@@ -115,7 +127,7 @@ async def test_read_and_write_large_file_binary(
|
|
115
127
|
long_bytes = b"\xc3" * 5_000_000
|
116
128
|
await sandbox_env.write_file(file_name, long_bytes)
|
117
129
|
written_file_bytes = await sandbox_env.read_file(file_name, text=False)
|
118
|
-
assert long_bytes == written_file_bytes
|
130
|
+
assert long_bytes == written_file_bytes, "Large binary content should match"
|
119
131
|
await _cleanup_file(sandbox_env, file_name)
|
120
132
|
|
121
133
|
|
@@ -125,7 +137,9 @@ async def test_read_and_write_file_including_directory_absolute(
|
|
125
137
|
file_name = "/tmp/test_rw_including_directory_absolute/test.file"
|
126
138
|
await sandbox_env.write_file(file_name, "absolutely enjoying being in a directory")
|
127
139
|
written_file_string = await sandbox_env.read_file(file_name, text=True)
|
128
|
-
assert "absolutely enjoying being in a directory" == written_file_string
|
140
|
+
assert "absolutely enjoying being in a directory" == written_file_string, (
|
141
|
+
f"Absolute directory content should match, got {written_file_string=}"
|
142
|
+
)
|
129
143
|
await _cleanup_file(sandbox_env, file_name)
|
130
144
|
await sandbox_env.exec(["rmdir", "/tmp/test_rw_including_directory_absolute"])
|
131
145
|
|
@@ -136,7 +150,9 @@ async def test_read_and_write_file_including_directory_relative(
|
|
136
150
|
file_name = "test_rw_including_directory_relative/test.file"
|
137
151
|
await sandbox_env.write_file(file_name, "relatively enjoying being in a directory")
|
138
152
|
written_file_string = await sandbox_env.read_file(file_name, text=True)
|
139
|
-
assert "relatively enjoying being in a directory" == written_file_string
|
153
|
+
assert "relatively enjoying being in a directory" == written_file_string, (
|
154
|
+
f"Relative directory content should match, got {written_file_string=}"
|
155
|
+
)
|
140
156
|
await _cleanup_file(sandbox_env, file_name)
|
141
157
|
await sandbox_env.exec(["rmdir", "test_rw_including_directory_relative"])
|
142
158
|
|
@@ -145,8 +161,12 @@ async def test_read_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
|
|
145
161
|
file_name = "zero_length_file.file"
|
146
162
|
await sandbox_env.exec(["touch", file_name])
|
147
163
|
zero_length = await sandbox_env.read_file(file_name, text=True)
|
148
|
-
assert isinstance(zero_length, str)
|
149
|
-
|
164
|
+
assert isinstance(zero_length, str), (
|
165
|
+
f"Zero-length file should return a string, got {type(zero_length)}"
|
166
|
+
)
|
167
|
+
assert zero_length == "", (
|
168
|
+
f"Zero-length file should be an empty string, got {zero_length=}"
|
169
|
+
)
|
150
170
|
await _cleanup_file(sandbox_env, file_name)
|
151
171
|
|
152
172
|
|
@@ -154,7 +174,10 @@ async def test_read_file_not_found(sandbox_env: SandboxEnvironment) -> None:
|
|
154
174
|
file_name = "nonexistent"
|
155
175
|
with Raises(FileNotFoundError) as e_info:
|
156
176
|
await sandbox_env.read_file(file_name, text=True)
|
157
|
-
assert
|
177
|
+
assert e_info is not None, "FileNotFoundError should be raised"
|
178
|
+
assert file_name in str(e_info.value), (
|
179
|
+
f"FileNotFoundError should contain the filename, got {e_info.value=}"
|
180
|
+
)
|
158
181
|
|
159
182
|
|
160
183
|
async def test_read_file_not_allowed(sandbox_env: SandboxEnvironment) -> None:
|
@@ -163,7 +186,10 @@ async def test_read_file_not_allowed(sandbox_env: SandboxEnvironment) -> None:
|
|
163
186
|
await sandbox_env.exec(["chmod", "-r", file_name])
|
164
187
|
with Raises(PermissionError) as e_info:
|
165
188
|
await sandbox_env.read_file(file_name, text=True)
|
166
|
-
assert
|
189
|
+
assert e_info is not None, "PermissionError should be raised"
|
190
|
+
assert file_name in str(e_info.value), (
|
191
|
+
f"PermissionError should contain the filename, got {e_info.value=}"
|
192
|
+
)
|
167
193
|
await sandbox_env.exec(["chmod", "+r", file_name])
|
168
194
|
await _cleanup_file(sandbox_env, file_name)
|
169
195
|
|
@@ -172,7 +198,10 @@ async def test_read_file_is_directory(sandbox_env: SandboxEnvironment) -> None:
|
|
172
198
|
file_name = "/etc"
|
173
199
|
with Raises(IsADirectoryError) as e_info:
|
174
200
|
await sandbox_env.read_file(file_name, text=True)
|
175
|
-
|
201
|
+
assert e_info is not None, "IsADirectoryError should be raised"
|
202
|
+
assert "directory" in str(e_info.value), (
|
203
|
+
f"IsADirectoryError should mention 'directory', got {e_info.value=}"
|
204
|
+
)
|
176
205
|
|
177
206
|
|
178
207
|
async def test_read_file_nonsense_name(
|
@@ -181,7 +210,10 @@ async def test_read_file_nonsense_name(
|
|
181
210
|
file_name = "https:/en.wikipedia.org/wiki/Bart%C5%82omiej_Kasprzykowski"
|
182
211
|
with Raises(FileNotFoundError) as e_info:
|
183
212
|
await sandbox_env.read_file(file_name, text=True)
|
184
|
-
assert "
|
213
|
+
assert e_info is not None, "FileNotFoundError should be raised"
|
214
|
+
assert "wikipedia" in str(e_info.value), (
|
215
|
+
f"FileNotFoundError should contain the filename, got {e_info.value=}"
|
216
|
+
)
|
185
217
|
|
186
218
|
|
187
219
|
async def test_read_file_limit(sandbox_env: SandboxEnvironment) -> None:
|
@@ -191,7 +223,10 @@ async def test_read_file_limit(sandbox_env: SandboxEnvironment) -> None:
|
|
191
223
|
with mock.patch.object(SandboxEnvironmentLimits, "MAX_READ_FILE_SIZE", 1024):
|
192
224
|
with Raises(OutputLimitExceededError) as e_info:
|
193
225
|
await sandbox_env.read_file(file_name, text=True)
|
194
|
-
|
226
|
+
assert e_info is not None, "OutputLimitExceededError should be raised"
|
227
|
+
assert "limit of 100 MiB was exceeded" in str(e_info.value), (
|
228
|
+
f"OutputLimitExceededError should mention the limit, got {e_info.value=}"
|
229
|
+
)
|
195
230
|
await _cleanup_file(sandbox_env, file_name)
|
196
231
|
|
197
232
|
|
@@ -199,8 +234,12 @@ async def test_write_text_file_zero_length(sandbox_env: SandboxEnvironment) -> N
|
|
199
234
|
file_name = "zero_length_file.file"
|
200
235
|
await sandbox_env.write_file(file_name, "")
|
201
236
|
zero_length = await sandbox_env.read_file(file_name, text=True)
|
202
|
-
assert isinstance(zero_length, str)
|
203
|
-
|
237
|
+
assert isinstance(zero_length, str), (
|
238
|
+
f"Zero-length file should return a string, got {type(zero_length)}"
|
239
|
+
)
|
240
|
+
assert zero_length == "", (
|
241
|
+
f"Zero-length file should be an empty string, got {zero_length=}"
|
242
|
+
)
|
204
243
|
await _cleanup_file(sandbox_env, file_name)
|
205
244
|
|
206
245
|
|
@@ -209,8 +248,12 @@ async def test_write_text_file_space(sandbox_env: SandboxEnvironment) -> None:
|
|
209
248
|
file_name = "file with space.file"
|
210
249
|
await sandbox_env.write_file(file_name, space)
|
211
250
|
file_with_space = await sandbox_env.read_file(file_name, text=True)
|
212
|
-
assert isinstance(file_with_space, str)
|
213
|
-
|
251
|
+
assert isinstance(file_with_space, str), (
|
252
|
+
f"File with space should return a string, got {type(file_with_space)}"
|
253
|
+
)
|
254
|
+
assert file_with_space == space, (
|
255
|
+
f"File with space content should match, got {file_with_space=}; expected {space=}"
|
256
|
+
)
|
214
257
|
await _cleanup_file(sandbox_env, file_name)
|
215
258
|
|
216
259
|
|
@@ -226,7 +269,10 @@ async def test_write_text_file_is_directory(
|
|
226
269
|
"/tmp/inspect_ai_test_write_text_file_is_directory",
|
227
270
|
"content cannot go in a directory, dummy",
|
228
271
|
)
|
229
|
-
assert "
|
272
|
+
assert e_info is not None, "IsADirectoryError should be raised"
|
273
|
+
assert "directory" in str(e_info.value), (
|
274
|
+
f"IsADirectoryError should mention 'directory', got {e_info.value=}"
|
275
|
+
)
|
230
276
|
await sandbox_env.exec(
|
231
277
|
["rm", "-rf", "/tmp/inspect_ai_test_write_text_file_is_directory"]
|
232
278
|
)
|
@@ -240,7 +286,10 @@ async def test_write_text_file_without_permissions(
|
|
240
286
|
await sandbox_env.exec(["chmod", "-w", file_name])
|
241
287
|
with Raises(PermissionError) as e_info:
|
242
288
|
await sandbox_env.write_file(file_name, "this won't stick")
|
243
|
-
assert
|
289
|
+
assert e_info is not None, "PermissionError should be raised"
|
290
|
+
assert file_name in str(e_info.value), (
|
291
|
+
f"PermissionError should contain the filename, got {e_info.value=}"
|
292
|
+
)
|
244
293
|
await sandbox_env.exec(["chmod", "+w", file_name])
|
245
294
|
await _cleanup_file(sandbox_env, file_name)
|
246
295
|
|
@@ -252,7 +301,9 @@ async def test_write_text_file_exists(
|
|
252
301
|
await sandbox_env.write_file(file_name, "mundane content")
|
253
302
|
await sandbox_env.write_file(file_name, "altered content")
|
254
303
|
altered_content = await sandbox_env.read_file(file_name, text=True)
|
255
|
-
assert altered_content == "altered content"
|
304
|
+
assert altered_content == "altered content", (
|
305
|
+
f"Existing file content should be overwritten, got {altered_content=}"
|
306
|
+
)
|
256
307
|
await _cleanup_file(sandbox_env, file_name)
|
257
308
|
|
258
309
|
|
@@ -260,8 +311,12 @@ async def test_write_binary_file_zero_length(sandbox_env: SandboxEnvironment) ->
|
|
260
311
|
file_name = "zero_length_file.file"
|
261
312
|
await sandbox_env.write_file(file_name, b"")
|
262
313
|
zero_length = await sandbox_env.read_file(file_name, text=False)
|
263
|
-
assert isinstance(zero_length, bytes)
|
264
|
-
|
314
|
+
assert isinstance(zero_length, bytes), (
|
315
|
+
f"Zero-length file should return bytes, got {type(zero_length)}"
|
316
|
+
)
|
317
|
+
assert zero_length == b"", (
|
318
|
+
f"Zero-length file should be empty bytes, got {zero_length=}"
|
319
|
+
)
|
265
320
|
await _cleanup_file(sandbox_env, file_name)
|
266
321
|
|
267
322
|
|
@@ -270,8 +325,10 @@ async def test_write_binary_file_space(sandbox_env: SandboxEnvironment) -> None:
|
|
270
325
|
file_name = "file with space.file"
|
271
326
|
await sandbox_env.write_file(file_name, binary_content)
|
272
327
|
file_with_space = await sandbox_env.read_file(file_name, text=False)
|
273
|
-
assert isinstance(file_with_space, bytes)
|
274
|
-
|
328
|
+
assert isinstance(file_with_space, bytes), (
|
329
|
+
f"File with space should return bytes, got {type(file_with_space)}"
|
330
|
+
)
|
331
|
+
assert file_with_space == binary_content, "File with space content should match"
|
275
332
|
await _cleanup_file(sandbox_env, file_name)
|
276
333
|
|
277
334
|
|
@@ -287,7 +344,10 @@ async def test_write_binary_file_is_directory(
|
|
287
344
|
"/tmp/inspect_ai_test_write_binary_file_is_directory",
|
288
345
|
b"\xc3\x28",
|
289
346
|
)
|
290
|
-
assert "
|
347
|
+
assert e_info is not None, "IsADirectoryError should be raised"
|
348
|
+
assert "directory" in str(e_info.value), (
|
349
|
+
f"IsADirectoryError should mention 'directory', got {e_info.value=}"
|
350
|
+
)
|
291
351
|
await sandbox_env.exec(
|
292
352
|
["rm", "-rf", "/tmp/inspect_ai_test_write_binary_file_is_directory"]
|
293
353
|
)
|
@@ -301,7 +361,10 @@ async def test_write_binary_file_without_permissions(
|
|
301
361
|
await sandbox_env.exec(["chmod", "-w", file_name])
|
302
362
|
with Raises(PermissionError) as e_info:
|
303
363
|
await sandbox_env.write_file(file_name, b"\xc3\x28")
|
304
|
-
assert
|
364
|
+
assert e_info is not None, "PermissionError should be raised"
|
365
|
+
assert file_name in str(e_info.value), (
|
366
|
+
f"PermissionError should contain the filename, got {e_info.value=}"
|
367
|
+
)
|
305
368
|
await sandbox_env.exec(["chmod", "+w", file_name])
|
306
369
|
await _cleanup_file(sandbox_env, file_name)
|
307
370
|
|
@@ -313,7 +376,7 @@ async def test_write_binary_file_exists(
|
|
313
376
|
await sandbox_env.write_file(file_name, b"\xc3\x28")
|
314
377
|
await sandbox_env.write_file(file_name, b"\xc3\x29")
|
315
378
|
altered_content = await sandbox_env.read_file(file_name, text=False)
|
316
|
-
assert altered_content == b"\xc3\x29"
|
379
|
+
assert altered_content == b"\xc3\x29", "Existing file content should be overwritten"
|
317
380
|
await _cleanup_file(sandbox_env, file_name)
|
318
381
|
|
319
382
|
|
@@ -328,12 +391,16 @@ async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
|
|
328
391
|
|
329
392
|
async def test_exec_stderr(sandbox_env: SandboxEnvironment) -> None:
|
330
393
|
exec_result = await sandbox_env.exec(["sh", "-c", "echo boof; echo baz >&2"])
|
331
|
-
assert exec_result.stderr == "baz\n"
|
394
|
+
assert exec_result.stderr == "baz\n", (
|
395
|
+
f"stderr output should match; got {exec_result.stderr=}, expected 'baz\n'"
|
396
|
+
)
|
332
397
|
|
333
398
|
|
334
399
|
async def test_exec_returncode(sandbox_env: SandboxEnvironment) -> None:
|
335
400
|
exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; exit 70"])
|
336
|
-
assert exec_result.returncode == 70
|
401
|
+
assert exec_result.returncode == 70, (
|
402
|
+
f"Return code should match, got {exec_result.returncode=}, expected 70"
|
403
|
+
)
|
337
404
|
|
338
405
|
|
339
406
|
async def test_exec_timeout(sandbox_env: SandboxEnvironment) -> None:
|
@@ -391,13 +458,13 @@ async def test_exec_as_user(sandbox_env: SandboxEnvironment) -> None:
|
|
391
458
|
|
392
459
|
|
393
460
|
async def test_exec_as_nonexistent_user(sandbox_env: SandboxEnvironment) -> None:
|
394
|
-
|
461
|
+
nonexistent_username = "nonexistent"
|
462
|
+
result = await sandbox_env.exec(["whoami"], user=nonexistent_username)
|
395
463
|
assert not result.success, "Command should have failed for nonexistent user"
|
396
|
-
|
397
|
-
|
398
|
-
)
|
399
|
-
|
400
|
-
f"Error string '{expected_error}' not found in error output: '{result.stdout}'"
|
464
|
+
assert (
|
465
|
+
nonexistent_username in result.stdout or nonexistent_username in result.stderr
|
466
|
+
), (
|
467
|
+
f"Error not found in command output: '{result.stdout}' nor stderr '{result.stderr}"
|
401
468
|
)
|
402
469
|
|
403
470
|
|
@@ -405,13 +472,17 @@ async def test_cwd_unspecified(sandbox_env: SandboxEnvironment) -> None:
|
|
405
472
|
file_name = "test_cwd_unspecified.file"
|
406
473
|
await sandbox_env.write_file(file_name, "ls me plz")
|
407
474
|
current_dir_contents = (await sandbox_env.exec(["ls", "-1"])).stdout
|
408
|
-
assert file_name in current_dir_contents
|
475
|
+
assert file_name in current_dir_contents, (
|
476
|
+
f"File should be in current directory contents; got {current_dir_contents=}"
|
477
|
+
)
|
409
478
|
await _cleanup_file(sandbox_env, file_name)
|
410
479
|
|
411
480
|
|
412
481
|
async def test_cwd_custom(sandbox_env: SandboxEnvironment) -> None:
|
413
482
|
current_dir_contents = (await sandbox_env.exec(["ls"], cwd="/usr/bin")).stdout
|
414
|
-
assert "env" in current_dir_contents
|
483
|
+
assert "env" in current_dir_contents, (
|
484
|
+
f"env should be in /usr/bin; got {current_dir_contents=}"
|
485
|
+
)
|
415
486
|
|
416
487
|
|
417
488
|
async def test_cwd_relative(sandbox_env: SandboxEnvironment) -> None:
|
@@ -433,7 +504,9 @@ async def test_cwd_absolute(sandbox_env: SandboxEnvironment) -> None:
|
|
433
504
|
file_name = "/tmp/test_cwd_absolute/test_cwd_absolute.file"
|
434
505
|
await sandbox_env.write_file(file_name, "ls me plz")
|
435
506
|
current_dir_contents = (await sandbox_env.exec(["ls"], cwd=cwd_directory)).stdout
|
436
|
-
assert "test_cwd_absolute.file" in current_dir_contents
|
507
|
+
assert "test_cwd_absolute.file" in current_dir_contents, (
|
508
|
+
f"File should be in current directory contents, got {current_dir_contents=}"
|
509
|
+
)
|
437
510
|
await _cleanup_file(sandbox_env, file_name)
|
438
511
|
await sandbox_env.exec(["rmdir", cwd_directory])
|
439
512
|
|
@@ -442,20 +515,35 @@ async def test_exec_stdout_is_limited(sandbox_env: SandboxEnvironment) -> None:
|
|
442
515
|
output_size = 10 * 1024**2 + 1024 # 10 MiB + 1 KiB
|
443
516
|
with pytest.raises(OutputLimitExceededError) as e_info:
|
444
517
|
await sandbox_env.exec(["sh", "-c", f"yes | head -c {output_size}"])
|
445
|
-
assert
|
518
|
+
assert e_info is not None, "OutputLimitExceededError should be raised"
|
519
|
+
assert "limit of 10 MiB was exceeded" in str(e_info.value), (
|
520
|
+
"OutputLimitExceededError should mention the limit; got {e_info.value=}"
|
521
|
+
)
|
446
522
|
truncated_output = e_info.value.truncated_output
|
447
523
|
# `yes` outputs 'y\n' (ASCII) so the size equals the string length.
|
448
524
|
# some shells additionally output 'canceled\n' so we add fudge factor for that
|
449
|
-
assert truncated_output and (len(truncated_output) - 10 * 1024**2) < 10
|
525
|
+
assert truncated_output and (len(truncated_output) - 10 * 1024**2) < 10, (
|
526
|
+
f"output not truncated or wrong length; start of truncated output = {'' if not truncated_output else truncated_output[:10]}; len(truncated_output): {'n/a' if not truncated_output else len(truncated_output)}"
|
527
|
+
)
|
450
528
|
|
451
529
|
|
452
530
|
async def test_exec_stderr_is_limited(sandbox_env: SandboxEnvironment) -> None:
|
453
531
|
output_size = 10 * 1024**2 + 1024 # 10 MiB + 1 KiB
|
454
532
|
with pytest.raises(OutputLimitExceededError) as e_info:
|
455
533
|
await sandbox_env.exec(["sh", "-c", f"yes | head -c {output_size} 1>&2"])
|
456
|
-
assert
|
534
|
+
assert e_info is not None, "OutputLimitExceededError should be raised"
|
535
|
+
assert "limit of 10 MiB was exceeded" in str(e_info.value), (
|
536
|
+
"OutputLimitExceededError should mention the limit; got {e_info.value=}"
|
537
|
+
)
|
457
538
|
truncated_output = e_info.value.truncated_output
|
458
|
-
assert
|
539
|
+
assert (
|
540
|
+
truncated_output
|
541
|
+
and truncated_output[0] == "y"
|
542
|
+
and len(truncated_output) <= 10 * 1024**2
|
543
|
+
and len(truncated_output) > 0
|
544
|
+
), (
|
545
|
+
f"output not truncated or wrong length; start of truncated output = {'' if not truncated_output else truncated_output[:10]}; len(truncated_output): {'n/a' if not truncated_output else len(truncated_output)}"
|
546
|
+
)
|
459
547
|
|
460
548
|
|
461
549
|
# TODO: write a test for when cwd doesn't exist
|
inspect_ai/util/_subtask.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
import inspect
|
3
|
+
from datetime import datetime
|
3
4
|
from functools import wraps
|
4
5
|
from logging import getLogger
|
5
6
|
from typing import (
|
@@ -15,6 +16,7 @@ from typing import (
|
|
15
16
|
from inspect_ai._util._async import is_callable_coroutine
|
16
17
|
from inspect_ai._util.content import Content
|
17
18
|
from inspect_ai._util.trace import trace_action
|
19
|
+
from inspect_ai._util.working import sample_waiting_time
|
18
20
|
from inspect_ai.util._store import Store, dict_jsonable, init_subtask_store
|
19
21
|
|
20
22
|
SubtaskResult = str | int | float | bool | list[Content]
|
@@ -130,6 +132,7 @@ def subtask(
|
|
130
132
|
return result, list(transcript().events)
|
131
133
|
|
132
134
|
# create subtask event
|
135
|
+
waiting_time_start = sample_waiting_time()
|
133
136
|
event = SubtaskEvent(
|
134
137
|
name=subtask_name, input=log_input, type=type, pending=True
|
135
138
|
)
|
@@ -139,6 +142,14 @@ def subtask(
|
|
139
142
|
asyncio_task = asyncio.create_task(run())
|
140
143
|
result, events = await asyncio_task
|
141
144
|
|
145
|
+
# time accounting
|
146
|
+
completed = datetime.now()
|
147
|
+
waiting_time_end = sample_waiting_time()
|
148
|
+
event.completed = completed
|
149
|
+
event.working_time = (completed - event.timestamp).total_seconds() - (
|
150
|
+
waiting_time_end - waiting_time_start
|
151
|
+
)
|
152
|
+
|
142
153
|
# update event
|
143
154
|
event.result = result
|
144
155
|
event.events = events
|