PyPI - inspect-ai - Versions diffs - 0.3.70__py3-none-any.whl → 0.3.71__py3-none-any.whl - Mend

inspect-ai 0.3.70py3-none-any.whl → 0.3.71py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (208) hide show

inspect_ai/util/_sandbox/self_check.py CHANGED Viewed

@@ -9,6 +9,14 @@ from inspect_ai.util import (
     SandboxEnvironmentLimits,
 )
+# If you're wondering these tests are not using pytest fixtures,
+# see the discussion https://github.com/UKGovernmentBEIS/inspect_ai/pull/347
+# It's not ideal, so a PR to fix this would be welcome.
+#
+# If you are struggling to debug a failing one of these, two tips:
+# 1. Comment out everything apart from the failing test in the list in the `self_check` function
+# 2. Get rid of the try/catch in check_test_fn (the body can just be `await fn(sandbox_env); return True`
 async def check_test_fn(
     fn: Callable[[SandboxEnvironment], Coroutine[Any, Any, None]],
@@ -20,7 +28,7 @@ async def check_test_fn(
     except AssertionError as e:
         return f"FAILED: [{str(e)}]"
     except Exception as e:
-        return f"ERROR: {repr(e)}"
+        return f"ERROR: [{repr(e)}]"
 async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
@@ -92,8 +100,12 @@ async def test_write_file_text_utf(sandbox_env: SandboxEnvironment) -> None:
     file_name = "test_write_file_text_utf.file"
     await sandbox_env.write_file(file_name, utf_content)
     file_with_utf_content = await sandbox_env.read_file(file_name, text=True)
-    assert isinstance(file_with_utf_content, str)
-    assert file_with_utf_content == utf_content
+    assert isinstance(file_with_utf_content, str), (
+        f"Expected file content to be a string, got {type(file_with_utf_content)}"
+    )
+    assert file_with_utf_content == utf_content, (
+        f"UTF-8 content should match, got {file_with_utf_content=}; expected {utf_content=}"
+    )
     await _cleanup_file(sandbox_env, file_name)
@@ -104,7 +116,7 @@ async def test_read_and_write_file_binary(sandbox_env: SandboxEnvironment) -> No
     )  # invalid UTF-8 from https://stackoverflow.com/a/17199164/116509
     written_file_bytes = await sandbox_env.read_file(file_name, text=False)
-    assert b"\xc3\x28" == written_file_bytes
+    assert b"\xc3\x28" == written_file_bytes, "Binary content should match"
     await _cleanup_file(sandbox_env, file_name)
@@ -115,7 +127,7 @@ async def test_read_and_write_large_file_binary(
     long_bytes = b"\xc3" * 5_000_000
     await sandbox_env.write_file(file_name, long_bytes)
     written_file_bytes = await sandbox_env.read_file(file_name, text=False)
-    assert long_bytes == written_file_bytes
+    assert long_bytes == written_file_bytes, "Large binary content should match"
     await _cleanup_file(sandbox_env, file_name)
@@ -125,7 +137,9 @@ async def test_read_and_write_file_including_directory_absolute(
     file_name = "/tmp/test_rw_including_directory_absolute/test.file"
     await sandbox_env.write_file(file_name, "absolutely enjoying being in a directory")
     written_file_string = await sandbox_env.read_file(file_name, text=True)
-    assert "absolutely enjoying being in a directory" == written_file_string
+    assert "absolutely enjoying being in a directory" == written_file_string, (
+        f"Absolute directory content should match, got {written_file_string=}"
+    )
     await _cleanup_file(sandbox_env, file_name)
     await sandbox_env.exec(["rmdir", "/tmp/test_rw_including_directory_absolute"])
@@ -136,7 +150,9 @@ async def test_read_and_write_file_including_directory_relative(
     file_name = "test_rw_including_directory_relative/test.file"
     await sandbox_env.write_file(file_name, "relatively enjoying being in a directory")
     written_file_string = await sandbox_env.read_file(file_name, text=True)
-    assert "relatively enjoying being in a directory" == written_file_string
+    assert "relatively enjoying being in a directory" == written_file_string, (
+        f"Relative directory content should match, got {written_file_string=}"
+    )
     await _cleanup_file(sandbox_env, file_name)
     await sandbox_env.exec(["rmdir", "test_rw_including_directory_relative"])
@@ -145,8 +161,12 @@ async def test_read_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
     file_name = "zero_length_file.file"
     await sandbox_env.exec(["touch", file_name])
     zero_length = await sandbox_env.read_file(file_name, text=True)
-    assert isinstance(zero_length, str)
-    assert zero_length == ""
+    assert isinstance(zero_length, str), (
+        f"Zero-length file should return a string, got {type(zero_length)}"
+    )
+    assert zero_length == "", (
+        f"Zero-length file should be an empty string, got {zero_length=}"
+    )
     await _cleanup_file(sandbox_env, file_name)
@@ -154,7 +174,10 @@ async def test_read_file_not_found(sandbox_env: SandboxEnvironment) -> None:
     file_name = "nonexistent"
     with Raises(FileNotFoundError) as e_info:
         await sandbox_env.read_file(file_name, text=True)
-    assert file_name in str(e_info.value)
+    assert e_info is not None, "FileNotFoundError should be raised"
+    assert file_name in str(e_info.value), (
+        f"FileNotFoundError should contain the filename, got {e_info.value=}"
+    )
 async def test_read_file_not_allowed(sandbox_env: SandboxEnvironment) -> None:
@@ -163,7 +186,10 @@ async def test_read_file_not_allowed(sandbox_env: SandboxEnvironment) -> None:
     await sandbox_env.exec(["chmod", "-r", file_name])
     with Raises(PermissionError) as e_info:
         await sandbox_env.read_file(file_name, text=True)
-    assert file_name in str(e_info.value)
+    assert e_info is not None, "PermissionError should be raised"
+    assert file_name in str(e_info.value), (
+        f"PermissionError should contain the filename, got {e_info.value=}"
+    )
     await sandbox_env.exec(["chmod", "+r", file_name])
     await _cleanup_file(sandbox_env, file_name)
@@ -172,7 +198,10 @@ async def test_read_file_is_directory(sandbox_env: SandboxEnvironment) -> None:
     file_name = "/etc"
     with Raises(IsADirectoryError) as e_info:
         await sandbox_env.read_file(file_name, text=True)
-    assert "directory" in str(e_info.value)
+        assert e_info is not None, "IsADirectoryError should be raised"
+    assert "directory" in str(e_info.value), (
+        f"IsADirectoryError should mention 'directory', got {e_info.value=}"
+    )
 async def test_read_file_nonsense_name(
@@ -181,7 +210,10 @@ async def test_read_file_nonsense_name(
     file_name = "https:/en.wikipedia.org/wiki/Bart%C5%82omiej_Kasprzykowski"
     with Raises(FileNotFoundError) as e_info:
         await sandbox_env.read_file(file_name, text=True)
-    assert "wikipedia" in str(e_info.value)
+    assert e_info is not None, "FileNotFoundError should be raised"
+    assert "wikipedia" in str(e_info.value), (
+        f"FileNotFoundError should contain the filename, got {e_info.value=}"
+    )
 async def test_read_file_limit(sandbox_env: SandboxEnvironment) -> None:
@@ -191,7 +223,10 @@ async def test_read_file_limit(sandbox_env: SandboxEnvironment) -> None:
     with mock.patch.object(SandboxEnvironmentLimits, "MAX_READ_FILE_SIZE", 1024):
         with Raises(OutputLimitExceededError) as e_info:
             await sandbox_env.read_file(file_name, text=True)
-        assert "limit of 100 MiB was exceeded" in str(e_info.value)
+    assert e_info is not None, "OutputLimitExceededError should be raised"
+    assert "limit of 100 MiB was exceeded" in str(e_info.value), (
+        f"OutputLimitExceededError should mention the limit, got {e_info.value=}"
+    )
     await _cleanup_file(sandbox_env, file_name)
@@ -199,8 +234,12 @@ async def test_write_text_file_zero_length(sandbox_env: SandboxEnvironment) -> N
     file_name = "zero_length_file.file"
     await sandbox_env.write_file(file_name, "")
     zero_length = await sandbox_env.read_file(file_name, text=True)
-    assert isinstance(zero_length, str)
-    assert zero_length == ""
+    assert isinstance(zero_length, str), (
+        f"Zero-length file should return a string, got {type(zero_length)}"
+    )
+    assert zero_length == "", (
+        f"Zero-length file should be an empty string, got {zero_length=}"
+    )
     await _cleanup_file(sandbox_env, file_name)
@@ -209,8 +248,12 @@ async def test_write_text_file_space(sandbox_env: SandboxEnvironment) -> None:
     file_name = "file with space.file"
     await sandbox_env.write_file(file_name, space)
     file_with_space = await sandbox_env.read_file(file_name, text=True)
-    assert isinstance(file_with_space, str)
-    assert file_with_space == space
+    assert isinstance(file_with_space, str), (
+        f"File with space should return a string, got {type(file_with_space)}"
+    )
+    assert file_with_space == space, (
+        f"File with space content should match, got {file_with_space=}; expected {space=}"
+    )
     await _cleanup_file(sandbox_env, file_name)
@@ -226,7 +269,10 @@ async def test_write_text_file_is_directory(
             "/tmp/inspect_ai_test_write_text_file_is_directory",
             "content cannot go in a directory, dummy",
         )
-    assert "directory" in str(e_info.value)
+    assert e_info is not None, "IsADirectoryError should be raised"
+    assert "directory" in str(e_info.value), (
+        f"IsADirectoryError should mention 'directory', got {e_info.value=}"
+    )
     await sandbox_env.exec(
         ["rm", "-rf", "/tmp/inspect_ai_test_write_text_file_is_directory"]
     )
@@ -240,7 +286,10 @@ async def test_write_text_file_without_permissions(
     await sandbox_env.exec(["chmod", "-w", file_name])
     with Raises(PermissionError) as e_info:
         await sandbox_env.write_file(file_name, "this won't stick")
-    assert file_name in str(e_info.value)
+    assert e_info is not None, "PermissionError should be raised"
+    assert file_name in str(e_info.value), (
+        f"PermissionError should contain the filename, got {e_info.value=}"
+    )
     await sandbox_env.exec(["chmod", "+w", file_name])
     await _cleanup_file(sandbox_env, file_name)
@@ -252,7 +301,9 @@ async def test_write_text_file_exists(
     await sandbox_env.write_file(file_name, "mundane content")
     await sandbox_env.write_file(file_name, "altered content")
     altered_content = await sandbox_env.read_file(file_name, text=True)
-    assert altered_content == "altered content"
+    assert altered_content == "altered content", (
+        f"Existing file content should be overwritten, got {altered_content=}"
+    )
     await _cleanup_file(sandbox_env, file_name)
@@ -260,8 +311,12 @@ async def test_write_binary_file_zero_length(sandbox_env: SandboxEnvironment) ->
     file_name = "zero_length_file.file"
     await sandbox_env.write_file(file_name, b"")
     zero_length = await sandbox_env.read_file(file_name, text=False)
-    assert isinstance(zero_length, bytes)
-    assert zero_length == b""
+    assert isinstance(zero_length, bytes), (
+        f"Zero-length file should return bytes, got {type(zero_length)}"
+    )
+    assert zero_length == b"", (
+        f"Zero-length file should be empty bytes, got {zero_length=}"
+    )
     await _cleanup_file(sandbox_env, file_name)
@@ -270,8 +325,10 @@ async def test_write_binary_file_space(sandbox_env: SandboxEnvironment) -> None:
     file_name = "file with space.file"
     await sandbox_env.write_file(file_name, binary_content)
     file_with_space = await sandbox_env.read_file(file_name, text=False)
-    assert isinstance(file_with_space, bytes)
-    assert file_with_space == binary_content
+    assert isinstance(file_with_space, bytes), (
+        f"File with space should return bytes, got {type(file_with_space)}"
+    )
+    assert file_with_space == binary_content, "File with space content should match"
     await _cleanup_file(sandbox_env, file_name)
@@ -287,7 +344,10 @@ async def test_write_binary_file_is_directory(
             "/tmp/inspect_ai_test_write_binary_file_is_directory",
             b"\xc3\x28",
         )
-    assert "directory" in str(e_info.value)
+    assert e_info is not None, "IsADirectoryError should be raised"
+    assert "directory" in str(e_info.value), (
+        f"IsADirectoryError should mention 'directory', got {e_info.value=}"
+    )
     await sandbox_env.exec(
         ["rm", "-rf", "/tmp/inspect_ai_test_write_binary_file_is_directory"]
     )
@@ -301,7 +361,10 @@ async def test_write_binary_file_without_permissions(
     await sandbox_env.exec(["chmod", "-w", file_name])
     with Raises(PermissionError) as e_info:
         await sandbox_env.write_file(file_name, b"\xc3\x28")
-    assert file_name in str(e_info.value)
+    assert e_info is not None, "PermissionError should be raised"
+    assert file_name in str(e_info.value), (
+        f"PermissionError should contain the filename, got {e_info.value=}"
+    )
     await sandbox_env.exec(["chmod", "+w", file_name])
     await _cleanup_file(sandbox_env, file_name)
@@ -313,7 +376,7 @@ async def test_write_binary_file_exists(
     await sandbox_env.write_file(file_name, b"\xc3\x28")
     await sandbox_env.write_file(file_name, b"\xc3\x29")
     altered_content = await sandbox_env.read_file(file_name, text=False)
-    assert altered_content == b"\xc3\x29"
+    assert altered_content == b"\xc3\x29", "Existing file content should be overwritten"
     await _cleanup_file(sandbox_env, file_name)
@@ -328,12 +391,16 @@ async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
 async def test_exec_stderr(sandbox_env: SandboxEnvironment) -> None:
     exec_result = await sandbox_env.exec(["sh", "-c", "echo boof; echo baz >&2"])
-    assert exec_result.stderr == "baz\n"
+    assert exec_result.stderr == "baz\n", (
+        f"stderr output should match; got {exec_result.stderr=}, expected 'baz\n'"
+    )
 async def test_exec_returncode(sandbox_env: SandboxEnvironment) -> None:
     exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; exit 70"])
-    assert exec_result.returncode == 70
+    assert exec_result.returncode == 70, (
+        f"Return code should match, got {exec_result.returncode=}, expected 70"
+    )
 async def test_exec_timeout(sandbox_env: SandboxEnvironment) -> None:
@@ -391,13 +458,13 @@ async def test_exec_as_user(sandbox_env: SandboxEnvironment) -> None:
 async def test_exec_as_nonexistent_user(sandbox_env: SandboxEnvironment) -> None:
-    result = await sandbox_env.exec(["whoami"], user="nonexistent")
+    nonexistent_username = "nonexistent"
+    result = await sandbox_env.exec(["whoami"], user=nonexistent_username)
     assert not result.success, "Command should have failed for nonexistent user"
-    expected_error = (
-        "unable to find user nonexistent: no matching entries in passwd file"
-    )
-    assert expected_error in result.stdout, (
-        f"Error string '{expected_error}' not found in error output: '{result.stdout}'"
+    assert (
+        nonexistent_username in result.stdout or nonexistent_username in result.stderr
+    ), (
+        f"Error not found in command output: '{result.stdout}' nor stderr '{result.stderr}"
     )
@@ -405,13 +472,17 @@ async def test_cwd_unspecified(sandbox_env: SandboxEnvironment) -> None:
     file_name = "test_cwd_unspecified.file"
     await sandbox_env.write_file(file_name, "ls me plz")
     current_dir_contents = (await sandbox_env.exec(["ls", "-1"])).stdout
-    assert file_name in current_dir_contents
+    assert file_name in current_dir_contents, (
+        f"File should be in current directory contents; got {current_dir_contents=}"
+    )
     await _cleanup_file(sandbox_env, file_name)
 async def test_cwd_custom(sandbox_env: SandboxEnvironment) -> None:
     current_dir_contents = (await sandbox_env.exec(["ls"], cwd="/usr/bin")).stdout
-    assert "env" in current_dir_contents
+    assert "env" in current_dir_contents, (
+        f"env should be in /usr/bin; got {current_dir_contents=}"
+    )
 async def test_cwd_relative(sandbox_env: SandboxEnvironment) -> None:
@@ -433,7 +504,9 @@ async def test_cwd_absolute(sandbox_env: SandboxEnvironment) -> None:
     file_name = "/tmp/test_cwd_absolute/test_cwd_absolute.file"
     await sandbox_env.write_file(file_name, "ls me plz")
     current_dir_contents = (await sandbox_env.exec(["ls"], cwd=cwd_directory)).stdout
-    assert "test_cwd_absolute.file" in current_dir_contents
+    assert "test_cwd_absolute.file" in current_dir_contents, (
+        f"File should be in current directory contents, got {current_dir_contents=}"
+    )
     await _cleanup_file(sandbox_env, file_name)
     await sandbox_env.exec(["rmdir", cwd_directory])
@@ -442,20 +515,35 @@ async def test_exec_stdout_is_limited(sandbox_env: SandboxEnvironment) -> None:
     output_size = 10 * 1024**2 + 1024  # 10 MiB + 1 KiB
     with pytest.raises(OutputLimitExceededError) as e_info:
         await sandbox_env.exec(["sh", "-c", f"yes | head -c {output_size}"])
-    assert "limit of 10 MiB was exceeded" in str(e_info.value)
+    assert e_info is not None, "OutputLimitExceededError should be raised"
+    assert "limit of 10 MiB was exceeded" in str(e_info.value), (
+        "OutputLimitExceededError should mention the limit; got {e_info.value=}"
+    )
     truncated_output = e_info.value.truncated_output
     # `yes` outputs 'y\n' (ASCII) so the size equals the string length.
     # some shells additionally output 'canceled\n' so we add fudge factor for that
-    assert truncated_output and (len(truncated_output) - 10 * 1024**2) < 10
+    assert truncated_output and (len(truncated_output) - 10 * 1024**2) < 10, (
+        f"output not truncated or wrong length; start of truncated output = {'' if not truncated_output else truncated_output[:10]}; len(truncated_output): {'n/a' if not truncated_output else len(truncated_output)}"
+    )
 async def test_exec_stderr_is_limited(sandbox_env: SandboxEnvironment) -> None:
     output_size = 10 * 1024**2 + 1024  # 10 MiB + 1 KiB
     with pytest.raises(OutputLimitExceededError) as e_info:
         await sandbox_env.exec(["sh", "-c", f"yes | head -c {output_size} 1>&2"])
-    assert "limit of 10 MiB was exceeded" in str(e_info.value)
+    assert e_info is not None, "OutputLimitExceededError should be raised"
+    assert "limit of 10 MiB was exceeded" in str(e_info.value), (
+        "OutputLimitExceededError should mention the limit; got {e_info.value=}"
+    )
     truncated_output = e_info.value.truncated_output
-    assert truncated_output and len(truncated_output) == 10 * 1024**2
+    assert (
+        truncated_output
+        and truncated_output[0] == "y"
+        and len(truncated_output) <= 10 * 1024**2
+        and len(truncated_output) > 0
+    ), (
+        f"output not truncated or wrong length; start of truncated output = {'' if not truncated_output else truncated_output[:10]}; len(truncated_output): {'n/a' if not truncated_output else len(truncated_output)}"
+    )
 # TODO: write a test for when cwd doesn't exist

inspect_ai/util/_subtask.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import inspect
+from datetime import datetime
 from functools import wraps
 from logging import getLogger
 from typing import (
@@ -15,6 +16,7 @@ from typing import (
 from inspect_ai._util._async import is_callable_coroutine
 from inspect_ai._util.content import Content
 from inspect_ai._util.trace import trace_action
+from inspect_ai._util.working import sample_waiting_time
 from inspect_ai.util._store import Store, dict_jsonable, init_subtask_store
 SubtaskResult = str | int | float | bool | list[Content]
@@ -130,6 +132,7 @@ def subtask(
                 return result, list(transcript().events)
             # create subtask event
+            waiting_time_start = sample_waiting_time()
             event = SubtaskEvent(
                 name=subtask_name, input=log_input, type=type, pending=True
             )
@@ -139,6 +142,14 @@ def subtask(
             asyncio_task = asyncio.create_task(run())
             result, events = await asyncio_task
+            # time accounting
+            completed = datetime.now()
+            waiting_time_end = sample_waiting_time()
+            event.completed = completed
+            event.working_time = (completed - event.timestamp).total_seconds() - (
+                waiting_time_end - waiting_time_start
+            )
             # update event
             event.result = result
             event.events = events

{inspect_ai-0.3.70.dist-info → inspect_ai-0.3.71.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: inspect_ai
-Version: 0.3.70
+Version: 0.3.71
 Summary: Framework for large language model evaluations
 Author: UK AI Security Institute
 License: MIT License

inspect-ai 0.3.70__py3-none-any.whl → 0.3.71__py3-none-any.whl

inspect-ai 0.3.70py3-none-any.whl → 0.3.71py3-none-any.whl