inspect-ai 0.3.71__py3-none-any.whl → 0.3.73__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. inspect_ai/_cli/eval.py +14 -3
  2. inspect_ai/_cli/sandbox.py +3 -3
  3. inspect_ai/_cli/score.py +6 -4
  4. inspect_ai/_cli/trace.py +53 -6
  5. inspect_ai/_display/core/config.py +1 -1
  6. inspect_ai/_display/core/display.py +2 -1
  7. inspect_ai/_display/core/footer.py +6 -6
  8. inspect_ai/_display/plain/display.py +11 -6
  9. inspect_ai/_display/rich/display.py +23 -13
  10. inspect_ai/_display/textual/app.py +10 -9
  11. inspect_ai/_display/textual/display.py +2 -2
  12. inspect_ai/_display/textual/widgets/footer.py +4 -0
  13. inspect_ai/_display/textual/widgets/samples.py +14 -5
  14. inspect_ai/_eval/context.py +1 -2
  15. inspect_ai/_eval/eval.py +54 -41
  16. inspect_ai/_eval/loader.py +9 -2
  17. inspect_ai/_eval/run.py +148 -81
  18. inspect_ai/_eval/score.py +13 -8
  19. inspect_ai/_eval/task/images.py +31 -21
  20. inspect_ai/_eval/task/run.py +62 -59
  21. inspect_ai/_eval/task/rundir.py +16 -9
  22. inspect_ai/_eval/task/sandbox.py +7 -8
  23. inspect_ai/_eval/task/util.py +7 -0
  24. inspect_ai/_util/_async.py +118 -10
  25. inspect_ai/_util/constants.py +0 -2
  26. inspect_ai/_util/file.py +15 -29
  27. inspect_ai/_util/future.py +37 -0
  28. inspect_ai/_util/http.py +3 -99
  29. inspect_ai/_util/httpx.py +60 -0
  30. inspect_ai/_util/interrupt.py +2 -2
  31. inspect_ai/_util/json.py +5 -52
  32. inspect_ai/_util/logger.py +30 -86
  33. inspect_ai/_util/retry.py +10 -61
  34. inspect_ai/_util/trace.py +2 -2
  35. inspect_ai/_view/server.py +86 -3
  36. inspect_ai/_view/www/dist/assets/index.js +25837 -13269
  37. inspect_ai/_view/www/log-schema.json +253 -186
  38. inspect_ai/_view/www/package.json +2 -2
  39. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +8 -3
  40. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +2 -3
  41. inspect_ai/_view/www/src/types/log.d.ts +122 -94
  42. inspect_ai/approval/_human/manager.py +6 -10
  43. inspect_ai/approval/_human/panel.py +2 -2
  44. inspect_ai/dataset/_sources/util.py +7 -6
  45. inspect_ai/log/__init__.py +4 -0
  46. inspect_ai/log/_file.py +35 -61
  47. inspect_ai/log/_log.py +18 -1
  48. inspect_ai/log/_recorders/eval.py +14 -23
  49. inspect_ai/log/_recorders/json.py +3 -18
  50. inspect_ai/log/_samples.py +27 -2
  51. inspect_ai/log/_transcript.py +8 -8
  52. inspect_ai/model/__init__.py +2 -1
  53. inspect_ai/model/_call_tools.py +60 -40
  54. inspect_ai/model/_chat_message.py +3 -2
  55. inspect_ai/model/_generate_config.py +25 -0
  56. inspect_ai/model/_model.py +74 -36
  57. inspect_ai/model/_openai.py +9 -1
  58. inspect_ai/model/_providers/anthropic.py +172 -154
  59. inspect_ai/model/_providers/azureai.py +11 -9
  60. inspect_ai/model/_providers/bedrock.py +33 -24
  61. inspect_ai/model/_providers/cloudflare.py +8 -9
  62. inspect_ai/model/_providers/goodfire.py +7 -3
  63. inspect_ai/model/_providers/google.py +47 -13
  64. inspect_ai/model/_providers/groq.py +15 -15
  65. inspect_ai/model/_providers/hf.py +24 -17
  66. inspect_ai/model/_providers/mistral.py +36 -20
  67. inspect_ai/model/_providers/openai.py +30 -25
  68. inspect_ai/model/_providers/openai_o1.py +1 -1
  69. inspect_ai/model/_providers/providers.py +1 -1
  70. inspect_ai/model/_providers/together.py +3 -4
  71. inspect_ai/model/_providers/util/__init__.py +2 -2
  72. inspect_ai/model/_providers/util/chatapi.py +6 -19
  73. inspect_ai/model/_providers/util/hooks.py +165 -0
  74. inspect_ai/model/_providers/vertex.py +20 -3
  75. inspect_ai/model/_providers/vllm.py +16 -19
  76. inspect_ai/scorer/_multi.py +5 -2
  77. inspect_ai/solver/_bridge/patch.py +31 -1
  78. inspect_ai/solver/_fork.py +5 -3
  79. inspect_ai/solver/_human_agent/agent.py +3 -2
  80. inspect_ai/tool/__init__.py +8 -2
  81. inspect_ai/tool/_tool_info.py +4 -90
  82. inspect_ai/tool/_tool_params.py +4 -34
  83. inspect_ai/tool/_tools/_computer/_common.py +117 -58
  84. inspect_ai/tool/_tools/_computer/_computer.py +80 -57
  85. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +7 -1
  86. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +91 -0
  87. inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +8 -0
  88. inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +12 -0
  89. inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +78 -0
  90. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +20 -0
  91. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +175 -113
  92. inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +76 -20
  93. inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +65 -0
  94. inspect_ai/tool/_tools/_computer/test_args.py +151 -0
  95. inspect_ai/tool/_tools/_web_search.py +30 -24
  96. inspect_ai/util/__init__.py +4 -0
  97. inspect_ai/util/_concurrency.py +5 -6
  98. inspect_ai/util/_display.py +6 -0
  99. inspect_ai/util/_json.py +170 -0
  100. inspect_ai/util/_sandbox/docker/cleanup.py +13 -9
  101. inspect_ai/util/_sandbox/docker/docker.py +5 -0
  102. inspect_ai/util/_sandbox/environment.py +56 -9
  103. inspect_ai/util/_sandbox/service.py +12 -5
  104. inspect_ai/util/_subprocess.py +94 -113
  105. inspect_ai/util/_subtask.py +2 -4
  106. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/METADATA +6 -2
  107. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/RECORD +111 -103
  108. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/WHEEL +1 -1
  109. inspect_ai/_util/timeouts.py +0 -160
  110. inspect_ai/model/_providers/util/tracker.py +0 -92
  111. inspect_ai/tool/_tools/_computer/_computer_split.py +0 -198
  112. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/LICENSE +0 -0
  113. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/entry_points.txt +0 -0
  114. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/top_level.txt +0 -0
@@ -1,160 +0,0 @@
1
- # based on: https://github.com/python/cpython/commits/main/Lib/asyncio/timeouts.py
2
- # (vendored for compatibility with Python 3.1)
3
-
4
- import enum
5
- from asyncio import events, exceptions, tasks
6
- from contextlib import _AsyncGeneratorContextManager
7
- from types import TracebackType
8
- from typing import Any, Optional, Type, final
9
-
10
- __all__ = (
11
- "Timeout",
12
- "timeout",
13
- "timeout_at",
14
- )
15
-
16
-
17
- class _State(enum.Enum):
18
- CREATED = "created"
19
- ENTERED = "active"
20
- EXPIRING = "expiring"
21
- EXPIRED = "expired"
22
- EXITED = "finished"
23
-
24
-
25
- @final
26
- class Timeout(_AsyncGeneratorContextManager["Timeout"]):
27
- """Asynchronous context manager for cancelling overdue coroutines.
28
-
29
- Use `timeout()` or `timeout_at()` rather than instantiating this class directly.
30
- """
31
-
32
- def __init__(self, when: Optional[float]) -> None:
33
- """Schedule a timeout that will trigger at a given loop time.
34
-
35
- - If `when` is `None`, the timeout will never trigger.
36
- - If `when < loop.time()`, the timeout will trigger on the next
37
- iteration of the event loop.
38
- """
39
- self._state = _State.CREATED
40
-
41
- self._timeout_handler: Optional[events.Handle] = None
42
- self._task: Optional[tasks.Task[Any]] = None
43
- self._when = when
44
-
45
- def when(self) -> Optional[float]:
46
- """Return the current deadline."""
47
- return self._when
48
-
49
- def reschedule(self, when: Optional[float]) -> None:
50
- """Reschedule the timeout."""
51
- assert self._state is not _State.CREATED
52
- if self._state is not _State.ENTERED:
53
- raise RuntimeError(
54
- f"Cannot change state of {self._state.value} Timeout",
55
- )
56
-
57
- self._when = when
58
-
59
- if self._timeout_handler is not None:
60
- self._timeout_handler.cancel()
61
-
62
- if when is None:
63
- self._timeout_handler = None
64
- else:
65
- loop = events.get_running_loop()
66
- if when <= loop.time():
67
- self._timeout_handler = loop.call_soon(self._on_timeout)
68
- else:
69
- self._timeout_handler = loop.call_at(when, self._on_timeout)
70
-
71
- def expired(self) -> bool:
72
- """Is timeout expired during execution?"""
73
- return self._state in (_State.EXPIRING, _State.EXPIRED)
74
-
75
- def __repr__(self) -> str:
76
- info = [""]
77
- if self._state is _State.ENTERED:
78
- when = round(self._when, 3) if self._when is not None else None
79
- info.append(f"when={when}")
80
- info_str = " ".join(info)
81
- return f"<Timeout [{self._state.value}]{info_str}>"
82
-
83
- async def __aenter__(self) -> "Timeout":
84
- self._state = _State.ENTERED
85
- self._task = tasks.current_task()
86
- if self._task is None:
87
- raise RuntimeError("Timeout should be used inside a task")
88
- self.reschedule(self._when)
89
- return self
90
-
91
- async def __aexit__(
92
- self,
93
- exc_type: Optional[Type[BaseException]],
94
- exc_val: Optional[BaseException],
95
- exc_tb: Optional[TracebackType],
96
- ) -> Optional[bool]:
97
- assert self._state in (_State.ENTERED, _State.EXPIRING)
98
-
99
- if self._timeout_handler is not None:
100
- self._timeout_handler.cancel()
101
- self._timeout_handler = None
102
-
103
- if self._state is _State.EXPIRING:
104
- self._state = _State.EXPIRED
105
- if exc_type and issubclass(exc_type, exceptions.CancelledError):
106
- raise TimeoutError from exc_val
107
- elif self._state is _State.ENTERED:
108
- self._state = _State.EXITED
109
-
110
- return None
111
-
112
- def _on_timeout(self) -> None:
113
- assert self._state is _State.ENTERED
114
- if self._task:
115
- self._task.cancel()
116
- self._state = _State.EXPIRING
117
- # drop the reference early
118
- self._timeout_handler = None
119
-
120
-
121
- def timeout(delay: Optional[float]) -> Timeout:
122
- """Timeout async context manager.
123
-
124
- Useful in cases when you want to apply timeout logic around block
125
- of code or in cases when asyncio.wait_for is not suitable. For example:
126
-
127
- >>> async with asyncio.timeout(10): # 10 seconds timeout
128
- ... await long_running_task()
129
-
130
-
131
- delay - value in seconds or None to disable timeout logic
132
-
133
- long_running_task() is interrupted by raising asyncio.CancelledError,
134
- the top-most affected timeout() context manager converts CancelledError
135
- into TimeoutError.
136
- """
137
- loop = events.get_running_loop()
138
- return Timeout(loop.time() + delay if delay is not None else None)
139
-
140
-
141
- def timeout_at(when: Optional[float]) -> Timeout:
142
- """Schedule the timeout at absolute time.
143
-
144
- Like timeout() but argument gives absolute time in the same clock system
145
- as loop.time().
146
-
147
- Please note: it is not POSIX time but a time with
148
- undefined starting base, e.g. the time of the system power on.
149
-
150
- >>> async with asyncio.timeout_at(loop.time() + 10):
151
- ... await long_running_task()
152
-
153
-
154
- when - a deadline when timeout occurs or None to disable timeout logic
155
-
156
- long_running_task() is interrupted by raising asyncio.CancelledError,
157
- the top-most affected timeout() context manager converts CancelledError
158
- into TimeoutError.
159
- """
160
- return Timeout(when)
@@ -1,92 +0,0 @@
1
- import re
2
- import time
3
- from typing import Any, cast
4
-
5
- import httpx
6
- from shortuuid import uuid
7
-
8
-
9
- class HttpTimeTracker:
10
- def __init__(self) -> None:
11
- # track request start times
12
- self._requests: dict[str, float] = {}
13
-
14
- def start_request(self) -> str:
15
- request_id = uuid()
16
- self._requests[request_id] = time.monotonic()
17
- return request_id
18
-
19
- def end_request(self, request_id: str) -> float:
20
- # read the request time if (if available) and purge from dict
21
- request_time = self._requests.pop(request_id, None)
22
- if request_time is None:
23
- raise RuntimeError(f"request_id not registered: {request_id}")
24
-
25
- # return elapsed time
26
- return time.monotonic() - request_time
27
-
28
- def update_request_time(self, request_id: str) -> None:
29
- request_time = self._requests.get(request_id, None)
30
- if not request_time:
31
- raise RuntimeError(f"No request registered for request_id: {request_id}")
32
-
33
- # update the request time
34
- self._requests[request_id] = time.monotonic()
35
-
36
-
37
- class BotoTimeTracker(HttpTimeTracker):
38
- def __init__(self, session: Any) -> None:
39
- from aiobotocore.session import AioSession
40
-
41
- super().__init__()
42
-
43
- # register hook
44
- session = cast(AioSession, session._session)
45
- session.register(
46
- "before-send.bedrock-runtime.Converse", self.converse_before_send
47
- )
48
-
49
- def converse_before_send(self, **kwargs: Any) -> None:
50
- user_agent = kwargs["request"].headers["User-Agent"].decode()
51
- match = re.search(rf"{self.USER_AGENT_PREFIX}(\w+)", user_agent)
52
- if match:
53
- request_id = match.group(1)
54
- self.update_request_time(request_id)
55
-
56
- def user_agent_extra(self, request_id: str) -> str:
57
- return f"{self.USER_AGENT_PREFIX}{request_id}"
58
-
59
- USER_AGENT_PREFIX = "ins/rid#"
60
-
61
-
62
- class HttpxTimeTracker(HttpTimeTracker):
63
- """Class which tracks the duration of successful (200 status) http requests.
64
-
65
- A special header is injected into requests which is then read from
66
- an httpx 'request' event hook -- this creates a record of when the request
67
- started. Note that with retries a single request id could be started
68
- several times; our request hook makes sure we always track the time of
69
- the last request.
70
-
71
- To determine the total time, we also install an httpx response hook. In
72
- this hook we look for 200 responses which have a registered request id.
73
- When we find one, we update the end time of the request.
74
-
75
- There is an 'end_request()' method which gets the total requeset time
76
- for a request_id and then purges the request_id from our tracking (so
77
- the dict doesn't grow unbounded)
78
- """
79
-
80
- REQUEST_ID_HEADER = "x-irid"
81
-
82
- def __init__(self, client: httpx.AsyncClient):
83
- super().__init__()
84
-
85
- # install httpx request hook
86
- client.event_hooks["request"].append(self.request_hook)
87
-
88
- async def request_hook(self, request: httpx.Request) -> None:
89
- # update the last request time for this request id (as there could be retries)
90
- request_id = request.headers.get(self.REQUEST_ID_HEADER, None)
91
- if request_id:
92
- self.update_request_time(request_id)
@@ -1,198 +0,0 @@
1
- """
2
- This module provides the same functionality as the computer tool but via a list of per-action tools . e.g. computer_mouse_move(100, 100).
3
-
4
- The split version is not publicly exported, but is retained until we decide if it performs better than the monolithic computer tool.
5
- """
6
-
7
- from typing import Awaitable, Callable
8
-
9
- from inspect_ai.tool import Tool, ToolResult, tool
10
-
11
- from . import _common as common
12
-
13
- ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
14
-
15
-
16
- def computer_split(timeout: int | None = None) -> list[Tool]:
17
- """
18
- Computer interaction tools.
19
-
20
- Args:
21
- timeout (int | None): Timeout (in seconds) for command.
22
-
23
- Returns:
24
- List of computer interaction tools.
25
- """
26
- return [
27
- computer_cursor_position(),
28
- computer_screenshot(),
29
- computer_mouse_move(),
30
- computer_left_click(),
31
- computer_double_click(),
32
- computer_left_click_drag(),
33
- computer_right_click(),
34
- computer_key(),
35
- computer_type(),
36
- ]
37
-
38
-
39
- @tool()
40
- def computer_cursor_position(timeout: int | None = None) -> Tool:
41
- async def execute() -> ToolResult:
42
- """
43
- Get the current (x, y) pixel coordinate of the cursor on the screen.
44
-
45
- Args:
46
- None
47
-
48
- Returns:
49
- A `str` of the form "x y" where x and y are the current mouse coordinates.
50
- """
51
- return await common.cursor_position(timeout=timeout)
52
-
53
- return execute
54
-
55
-
56
- @tool()
57
- def computer_screenshot(timeout: int | None = None) -> Tool:
58
- async def execute() -> ToolResult:
59
- """
60
- Take a screenshot.
61
-
62
- Args:
63
- None
64
-
65
- Returns:
66
- A `list` with a single `ContentImage` of the screen.
67
- """
68
- return await common.screenshot(timeout=timeout)
69
-
70
- return execute
71
-
72
-
73
- @tool()
74
- def computer_mouse_move(timeout: int | None = None) -> Tool:
75
- async def execute(x: int, y: int) -> ToolResult:
76
- """
77
- Move the cursor to a specified (x, y) pixel coordinate on the screen.
78
-
79
- Args:
80
- x: X coordinate of the mouse destination.
81
- y: Y coordinate of the mouse destination.
82
-
83
- Returns:
84
- A `list` with a single `ContentImage` of the screen.
85
- """
86
- return await common.mouse_move(x, y, timeout=timeout)
87
-
88
- return execute
89
-
90
-
91
- @tool()
92
- def computer_left_click(timeout: int | None = None) -> Tool:
93
- async def execute() -> ToolResult:
94
- """
95
- Click the left mouse button.
96
-
97
- Args:
98
- None
99
-
100
- Returns:
101
- A `list` with a single `ContentImage` of the screen.
102
- """
103
- return await common.left_click(timeout=timeout)
104
-
105
- return execute
106
-
107
-
108
- @tool()
109
- def computer_double_click(timeout: int | None = None) -> Tool:
110
- async def execute() -> ToolResult:
111
- """
112
- Double-click the left mouse button.
113
-
114
- Args:
115
- None
116
-
117
- Returns:
118
- A `list` with a single `ContentImage` of the screen.
119
- """
120
- return await common.double_click(timeout=timeout)
121
-
122
- return execute
123
-
124
-
125
- @tool()
126
- def computer_left_click_drag(timeout: int | None = None) -> Tool:
127
- async def execute(x: int, y: int) -> ToolResult:
128
- """
129
- Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
130
-
131
- Args:
132
- x: X coordinate of the mouse destination.
133
- y: Y coordinate of the mouse destination.
134
-
135
- Returns:
136
- A `list` with a single `ContentImage` of the screen.
137
- """
138
- return await common.left_click_drag(x, y, timeout=timeout)
139
-
140
- return execute
141
-
142
-
143
- @tool()
144
- def computer_right_click(timeout: int | None = None) -> Tool:
145
- async def execute() -> ToolResult:
146
- """
147
- Click the right mouse button.
148
-
149
- Args:
150
- None
151
-
152
- Returns:
153
- A `list` with a single `ContentImage` of the screen.
154
- """
155
- return await common.right_click(timeout=timeout)
156
-
157
- return execute
158
-
159
-
160
- # keysm list is from https://gist.github.com/rvaiya/be31f42049a4b5ad46666a8e120d9843
161
- @tool()
162
- def computer_key(timeout: int | None = None) -> Tool:
163
- async def execute(key: str) -> ToolResult:
164
- """
165
- Press a key or key-combination on the keyboard.
166
-
167
- Args:
168
- key: The key or key-combination to press. Can be any key name supported by xdotool's `key` such as:
169
- "Return", "Escape", "alt+Tab", "BackSpace", "Tab", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key),
170
- "Insert", "Delete", "Home", "End", "Prior", "Next", "Left", "Up", "Right", "Down",
171
- "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12",
172
- "Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
173
- "KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
174
- "KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal"
175
-
176
- Returns:
177
- A `list` with a single `ContentImage` of the screen.
178
- """
179
- return await common.press_key(key, timeout=timeout)
180
-
181
- return execute
182
-
183
-
184
- @tool()
185
- def computer_type(timeout: int | None = None) -> Tool:
186
- async def execute(text: str) -> ToolResult:
187
- """
188
- Type a string of text on the keyboard.
189
-
190
- Args:
191
- text: The text to type. If the text contains spaces, enclose it in quotes.
192
-
193
- Returns:
194
- A `list` with a single `ContentImage` of the screen.
195
- """
196
- return await common.type(text, timeout=timeout)
197
-
198
- return execute