inspect-ai 0.3.60__py3-none-any.whl → 0.3.61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +0 -1
- inspect_ai/_eval/task/generate.py +41 -35
- inspect_ai/_eval/task/run.py +10 -4
- inspect_ai/_util/hooks.py +17 -7
- inspect_ai/_view/www/package.json +1 -1
- inspect_ai/model/_chat_message.py +2 -2
- inspect_ai/model/_model.py +90 -25
- inspect_ai/model/_providers/anthropic.py +0 -3
- inspect_ai/solver/__init__.py +2 -0
- inspect_ai/solver/_basic_agent.py +64 -54
- inspect_ai/{util → solver}/_limit.py +13 -0
- inspect_ai/solver/_task_state.py +37 -7
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +1 -3
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +1 -1
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +10 -0
- inspect_ai/util/__init__.py +0 -2
- inspect_ai/util/_sandbox/self_check.py +51 -28
- {inspect_ai-0.3.60.dist-info → inspect_ai-0.3.61.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.60.dist-info → inspect_ai-0.3.61.dist-info}/RECORD +24 -24
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +0 -10
- {inspect_ai-0.3.60.dist-info → inspect_ai-0.3.61.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.60.dist-info → inspect_ai-0.3.61.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.60.dist-info → inspect_ai-0.3.61.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.60.dist-info → inspect_ai-0.3.61.dist-info}/top_level.txt +0 -0
inspect_ai/_cli/eval.py
CHANGED
@@ -8,6 +8,7 @@ from inspect_ai.model import (
|
|
8
8
|
)
|
9
9
|
from inspect_ai.model._cache import epoch
|
10
10
|
from inspect_ai.solver import TaskState
|
11
|
+
from inspect_ai.solver._limit import SampleLimitExceededError
|
11
12
|
from inspect_ai.tool import ToolFunction
|
12
13
|
|
13
14
|
|
@@ -21,45 +22,50 @@ async def task_generate(
|
|
21
22
|
# track tool_choice (revert to "auto" after first forced call of a tool)
|
22
23
|
tool_choice = state.tool_choice
|
23
24
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
try:
|
26
|
+
while True:
|
27
|
+
# If we don't update the epoch here as we go, it's entirely possible
|
28
|
+
# we'd cache the same response for every single epoch, which would
|
29
|
+
# completely defeat the point!
|
30
|
+
epoch.set(state.epoch)
|
29
31
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
32
|
+
# call the model
|
33
|
+
state.output = await model.generate(
|
34
|
+
input=state.messages,
|
35
|
+
tools=state.tools,
|
36
|
+
tool_choice=tool_choice,
|
37
|
+
config=config,
|
38
|
+
cache=cache,
|
39
|
+
)
|
38
40
|
|
39
|
-
|
40
|
-
|
41
|
-
|
41
|
+
# append the assistant message
|
42
|
+
message = state.output.message
|
43
|
+
state.messages.append(message)
|
42
44
|
|
43
|
-
|
44
|
-
|
45
|
-
|
45
|
+
# check for completed
|
46
|
+
if state.completed:
|
47
|
+
return state
|
46
48
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
49
|
+
# resolve tool calls if necessary
|
50
|
+
if tool_calls != "none" and message.tool_calls:
|
51
|
+
# call tools and append messages to state
|
52
|
+
state.messages.extend(
|
53
|
+
await call_tools(message, state.tools, config.max_tool_output)
|
54
|
+
)
|
53
55
|
|
54
|
-
|
55
|
-
|
56
|
-
|
56
|
+
# check for completed or only executing a single tool call
|
57
|
+
if state.completed or tool_calls == "single":
|
58
|
+
return state
|
59
|
+
|
60
|
+
# if a tool_call was forced set tool_choice to 'auto'
|
61
|
+
# (otherwise it will get forced over and over again)
|
62
|
+
if isinstance(tool_choice, ToolFunction):
|
63
|
+
tool_choice = "auto"
|
57
64
|
|
58
|
-
#
|
59
|
-
|
60
|
-
|
61
|
-
tool_choice = "auto"
|
65
|
+
# no tool calls or not resolving tool calls, we are done!
|
66
|
+
else:
|
67
|
+
return state
|
62
68
|
|
63
|
-
|
64
|
-
|
65
|
-
|
69
|
+
# propagate current state along with sample limit exceeded
|
70
|
+
except SampleLimitExceededError as ex:
|
71
|
+
raise ex.with_state(state)
|
inspect_ai/_eval/task/run.py
CHANGED
@@ -75,9 +75,9 @@ from inspect_ai.scorer._scorer import unique_scorer_name
|
|
75
75
|
from inspect_ai.solver import Generate, Plan, TaskState
|
76
76
|
from inspect_ai.solver._chain import Chain, unroll
|
77
77
|
from inspect_ai.solver._fork import set_task_generate
|
78
|
+
from inspect_ai.solver._limit import SampleLimitExceededError
|
78
79
|
from inspect_ai.solver._solver import Solver
|
79
80
|
from inspect_ai.solver._task_state import sample_state, set_sample_state, state_jsonable
|
80
|
-
from inspect_ai.util._limit import SampleLimitExceededError
|
81
81
|
from inspect_ai.util._sandbox.context import sandbox_connections
|
82
82
|
from inspect_ai.util._sandbox.environment import SandboxEnvironmentSpec
|
83
83
|
from inspect_ai.util._subtask import init_subtask
|
@@ -402,7 +402,13 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
|
|
402
402
|
view_notify_eval(logger.location)
|
403
403
|
|
404
404
|
try:
|
405
|
-
|
405
|
+
if (
|
406
|
+
await send_telemetry("eval_log_location", eval_log.location)
|
407
|
+
== "not_handled"
|
408
|
+
):
|
409
|
+
# Converting the eval log to JSON is expensive. Only do so if
|
410
|
+
# eval_log_location was not handled.
|
411
|
+
await send_telemetry("eval_log", eval_log_json_str(eval_log))
|
406
412
|
except Exception as ex:
|
407
413
|
py_logger.warning(
|
408
414
|
f"Error occurred sending telemetry: {exception_message(ex)}"
|
@@ -650,7 +656,7 @@ async def task_run_sample(
|
|
650
656
|
)
|
651
657
|
|
652
658
|
# capture most recent state for scoring
|
653
|
-
state = sample_state() or state
|
659
|
+
state = ex.state or sample_state() or state
|
654
660
|
state.completed = True
|
655
661
|
|
656
662
|
except BaseException as ex:
|
@@ -734,7 +740,7 @@ async def task_run_sample(
|
|
734
740
|
error = handle_error(ex)
|
735
741
|
|
736
742
|
# handle sandboxenv init errors
|
737
|
-
except
|
743
|
+
except Exception as ex:
|
738
744
|
error = handle_error(ex)
|
739
745
|
|
740
746
|
# complete the sample
|
inspect_ai/_util/hooks.py
CHANGED
@@ -17,19 +17,29 @@ from .error import PrerequisiteError
|
|
17
17
|
#
|
18
18
|
# Telemetry can be optionally enabled by setting an INSPECT_TELEMETRY
|
19
19
|
# environment variable that points to a function in a package which
|
20
|
-
# conforms to the TelemetrySend signature below.
|
20
|
+
# conforms to the TelemetrySend signature below. A return value of True
|
21
|
+
# indicates that the telemetry event was handled.
|
21
22
|
|
22
|
-
# There are currently
|
23
|
-
# - model_usage
|
24
|
-
# -
|
23
|
+
# There are currently three types of telemetry sent:
|
24
|
+
# - model_usage (JSON string of the model usage)
|
25
|
+
# - eval_log_location (file path or URL string of the eval log)
|
26
|
+
# - eval_log (JSON string of the eval log)
|
27
|
+
# [only sent if eval_log_location unhandled]
|
28
|
+
# The eval_log_location type is preferred over eval_log as it means we can take
|
29
|
+
# advantage of the .eval format and avoid loading the whole log into memory.
|
25
30
|
|
26
|
-
TelemetrySend = Callable[[str, str], Awaitable[
|
31
|
+
TelemetrySend = Callable[[str, str], Awaitable[bool]]
|
27
32
|
|
28
33
|
|
29
|
-
async def send_telemetry(
|
34
|
+
async def send_telemetry(
|
35
|
+
type: Literal["model_usage", "eval_log", "eval_log_location"], json: str
|
36
|
+
) -> Literal["handled", "not_handled", "no_subscribers"]:
|
30
37
|
global _send_telemetry
|
31
38
|
if _send_telemetry:
|
32
|
-
await _send_telemetry(type, json)
|
39
|
+
if await _send_telemetry(type, json):
|
40
|
+
return "handled"
|
41
|
+
return "not_handled"
|
42
|
+
return "no_subscribers"
|
33
43
|
|
34
44
|
|
35
45
|
_send_telemetry: TelemetrySend | None = None
|
@@ -72,8 +72,8 @@ class ChatMessageUser(ChatMessageBase):
|
|
72
72
|
role: Literal["user"] = Field(default="user")
|
73
73
|
"""Conversation role."""
|
74
74
|
|
75
|
-
tool_call_id: str | None = Field(default=None)
|
76
|
-
"""ID of tool call this message has the content payload for."""
|
75
|
+
tool_call_id: list[str] | None = Field(default=None)
|
76
|
+
"""ID(s) of tool call(s) this message has the content payload for."""
|
77
77
|
|
78
78
|
|
79
79
|
class ChatMessageAssistant(ChatMessageBase):
|
inspect_ai/model/_model.py
CHANGED
@@ -33,7 +33,6 @@ from inspect_ai._util.trace import trace_action
|
|
33
33
|
from inspect_ai.tool import Tool, ToolChoice, ToolFunction, ToolInfo
|
34
34
|
from inspect_ai.tool._tool_def import ToolDef, tool_defs
|
35
35
|
from inspect_ai.util import concurrency
|
36
|
-
from inspect_ai.util._limit import SampleLimitExceededError
|
37
36
|
|
38
37
|
from ._cache import CacheEntry, CachePolicy, cache_fetch, cache_store
|
39
38
|
from ._call_tools import disable_parallel_tools, tool_call_view, tools_info
|
@@ -764,40 +763,104 @@ def resolve_tool_model_input(
|
|
764
763
|
def tool_result_images_as_user_message(
|
765
764
|
messages: list[ChatMessage],
|
766
765
|
) -> list[ChatMessage]:
|
767
|
-
|
766
|
+
"""
|
767
|
+
To conform to models lacking support for images in tool responses, create an alternate message history that moves images into a fabricated user message.
|
768
|
+
|
769
|
+
Tool responses will have images replaced with "Image content is included below.", and the new user message will contain the images.
|
770
|
+
"""
|
771
|
+
init_accum: ImagesAccumulator = ([], [], [])
|
772
|
+
chat_messages, user_message_content, tool_call_ids = functools.reduce(
|
773
|
+
tool_result_images_reducer, messages, init_accum
|
774
|
+
)
|
775
|
+
# if the last message was a tool result, we may need to flush the pending stuff here
|
776
|
+
return maybe_adding_user_message(chat_messages, user_message_content, tool_call_ids)
|
777
|
+
|
778
|
+
|
779
|
+
ImagesAccumulator = tuple[list[ChatMessage], list[Content], list[str]]
|
780
|
+
"""
|
781
|
+
ImagesAccumulator is a tuple containing three lists:
|
782
|
+
- The first list contains ChatMessages that are the result of processing.
|
783
|
+
- The second list contains ContentImages that need to be inserted into a fabricated user message.
|
784
|
+
- The third list contains the tool_call_id's associated with the tool responses.
|
785
|
+
"""
|
768
786
|
|
769
787
|
|
770
788
|
def tool_result_images_reducer(
|
771
|
-
|
789
|
+
accum: ImagesAccumulator,
|
772
790
|
message: ChatMessage,
|
773
|
-
) ->
|
791
|
+
) -> ImagesAccumulator:
|
792
|
+
messages, pending_content, tool_call_ids = accum
|
774
793
|
# if there are tool result images, pull them out into a ChatUserMessage
|
775
|
-
if
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
794
|
+
if (
|
795
|
+
isinstance(message, ChatMessageTool)
|
796
|
+
and isinstance(message.content, list)
|
797
|
+
and any([isinstance(c, ContentImage) for c in message.content])
|
798
|
+
):
|
799
|
+
init_accum: ImageContentAccumulator = ([], [])
|
800
|
+
new_user_message_content, edited_tool_message_content = functools.reduce(
|
801
|
+
tool_result_image_content_reducer, message.content, init_accum
|
780
802
|
)
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
text="Image content is in the message below."
|
803
|
+
|
804
|
+
return (
|
805
|
+
messages
|
806
|
+
+ [
|
807
|
+
ChatMessageTool(
|
808
|
+
content=edited_tool_message_content,
|
809
|
+
tool_call_id=message.tool_call_id,
|
810
|
+
function=message.function,
|
790
811
|
)
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
812
|
+
],
|
813
|
+
pending_content + new_user_message_content,
|
814
|
+
tool_call_ids + ([message.tool_call_id] if message.tool_call_id else []),
|
815
|
+
)
|
795
816
|
|
796
817
|
else:
|
797
|
-
|
818
|
+
return (
|
819
|
+
maybe_adding_user_message(messages, pending_content, tool_call_ids)
|
820
|
+
+ [message],
|
821
|
+
[],
|
822
|
+
[],
|
823
|
+
)
|
798
824
|
|
799
|
-
|
800
|
-
|
825
|
+
|
826
|
+
ImageContentAccumulator = tuple[list[Content], list[Content]]
|
827
|
+
"""
|
828
|
+
ImageContentAccumulator is a tuple containing two lists of Content objects:
|
829
|
+
- The first list contains ContentImages that will be included in a fabricated user message.
|
830
|
+
- The second list contains modified content for the tool message with images replaced with text.
|
831
|
+
"""
|
832
|
+
|
833
|
+
|
834
|
+
def tool_result_image_content_reducer(
|
835
|
+
acc: ImageContentAccumulator, content: Content
|
836
|
+
) -> ImageContentAccumulator:
|
837
|
+
"""
|
838
|
+
Reduces the messages Content into two separate lists: one for a fabricated user message that will contain the images and one for modified tool message with the images replaced with text.
|
839
|
+
|
840
|
+
Returns:
|
841
|
+
ImageContentReducer: A tuple containing two lists of Content objects.
|
842
|
+
- The first list contains the images that will be included in a fabricated user message.
|
843
|
+
- The second list contains modified content for the tool message with images replaced with text.
|
844
|
+
"""
|
845
|
+
new_user_message_content, edited_tool_message_content = acc
|
846
|
+
if isinstance(content, ContentImage):
|
847
|
+
return new_user_message_content + [content], edited_tool_message_content + [
|
848
|
+
ContentText(text="Image content is included below.")
|
849
|
+
]
|
850
|
+
|
851
|
+
else:
|
852
|
+
return new_user_message_content, edited_tool_message_content + [content]
|
853
|
+
|
854
|
+
|
855
|
+
def maybe_adding_user_message(
|
856
|
+
messages: list[ChatMessage], content: list[Content], tool_call_ids: list[str]
|
857
|
+
) -> list[ChatMessage]:
|
858
|
+
"""If content is empty, return messages, otherwise, create a new ChatMessageUser with it and return a new messages list with that message added."""
|
859
|
+
return (
|
860
|
+
messages + [ChatMessageUser(content=content, tool_call_id=tool_call_ids)]
|
861
|
+
if content
|
862
|
+
else messages
|
863
|
+
)
|
801
864
|
|
802
865
|
|
803
866
|
# Functions to reduce consecutive user messages to a single user message -> required for some models
|
@@ -884,6 +947,7 @@ def handle_sample_message_limit(input: str | list[ChatMessage]) -> None:
|
|
884
947
|
active_sample_message_limit,
|
885
948
|
set_active_sample_total_messages,
|
886
949
|
)
|
950
|
+
from inspect_ai.solver._limit import SampleLimitExceededError
|
887
951
|
|
888
952
|
total_messages = 1 if isinstance(input, str) else len(input)
|
889
953
|
message_limit = active_sample_message_limit()
|
@@ -910,6 +974,7 @@ def record_model_usage(model: str, usage: ModelUsage) -> None:
|
|
910
974
|
active_sample_token_limit,
|
911
975
|
set_active_sample_total_tokens,
|
912
976
|
)
|
977
|
+
from inspect_ai.solver._limit import SampleLimitExceededError
|
913
978
|
|
914
979
|
# record usage
|
915
980
|
set_model_usage(model, usage, sample_model_usage_context_var.get(None))
|
@@ -291,9 +291,6 @@ class AnthropicAPI(ModelAPI):
|
|
291
291
|
elif "content filtering" in error:
|
292
292
|
content = "Sorry, but I am unable to help with that request."
|
293
293
|
stop_reason = "content_filter"
|
294
|
-
else:
|
295
|
-
content = error
|
296
|
-
stop_reason = "unknown"
|
297
294
|
|
298
295
|
if content and stop_reason:
|
299
296
|
return ModelOutput.from_content(
|
inspect_ai/solver/__init__.py
CHANGED
@@ -6,6 +6,7 @@ from ._chain import chain
|
|
6
6
|
from ._critique import self_critique
|
7
7
|
from ._fork import fork
|
8
8
|
from ._human_agent.agent import human_agent
|
9
|
+
from ._limit import SampleLimitExceededError
|
9
10
|
from ._multiple_choice import MultipleChoiceTemplate, multiple_choice
|
10
11
|
from ._plan import Plan, plan
|
11
12
|
from ._prompt import chain_of_thought, prompt_template, system_message, user_message
|
@@ -37,6 +38,7 @@ __all__ = [
|
|
37
38
|
"TaskState",
|
38
39
|
"Generate",
|
39
40
|
"MultipleChoiceTemplate",
|
41
|
+
"SampleLimitExceededError",
|
40
42
|
]
|
41
43
|
|
42
44
|
|
@@ -13,6 +13,7 @@ from inspect_ai.solver._chain import chain
|
|
13
13
|
from inspect_ai.tool._tool import Tool, ToolResult, tool
|
14
14
|
from inspect_ai.tool._tool_with import tool_with
|
15
15
|
|
16
|
+
from ._limit import SampleLimitExceededError
|
16
17
|
from ._prompt import system_message
|
17
18
|
from ._solver import Generate, Solver, solver
|
18
19
|
from ._task_state import TaskState
|
@@ -167,61 +168,70 @@ def basic_agent(
|
|
167
168
|
# track attempts
|
168
169
|
attempts = 0
|
169
170
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
state.messages.append(state.output.message)
|
177
|
-
|
178
|
-
# check for context window overflow
|
179
|
-
if state.output.stop_reason == "model_length":
|
180
|
-
from inspect_ai.log._transcript import transcript
|
181
|
-
|
182
|
-
transcript().info("Agent terminated: model context window exceeded")
|
183
|
-
break
|
184
|
-
|
185
|
-
# resolve tools calls (if any)
|
186
|
-
if state.output.message.tool_calls:
|
187
|
-
# call tool functions
|
188
|
-
tool_results = await call_tools(
|
189
|
-
state.output.message, state.tools, max_output=max_tool_output
|
171
|
+
try:
|
172
|
+
# main loop (state.completed checks message_limit and token_limit)
|
173
|
+
while not state.completed:
|
174
|
+
# generate output and append assistant message
|
175
|
+
state.output = await get_model().generate(
|
176
|
+
input=state.messages, tools=state.tools, cache=cache
|
190
177
|
)
|
191
|
-
state.messages.
|
192
|
-
|
193
|
-
#
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
178
|
+
state.messages.append(state.output.message)
|
179
|
+
|
180
|
+
# check for context window overflow
|
181
|
+
if state.output.stop_reason == "model_length":
|
182
|
+
from inspect_ai.log._transcript import transcript
|
183
|
+
|
184
|
+
transcript().info(
|
185
|
+
"Agent terminated: model context window exceeded"
|
186
|
+
)
|
187
|
+
break
|
188
|
+
|
189
|
+
# resolve tools calls (if any)
|
190
|
+
if state.output.message.tool_calls:
|
191
|
+
# call tool functions
|
192
|
+
tool_results = await call_tools(
|
193
|
+
state.output.message,
|
194
|
+
state.tools,
|
195
|
+
max_output=max_tool_output,
|
196
|
+
)
|
197
|
+
state.messages.extend(tool_results)
|
198
|
+
|
199
|
+
# was an answer submitted?
|
200
|
+
answer = submission(tool_results)
|
201
|
+
if answer:
|
202
|
+
# set the output to the answer for scoring
|
203
|
+
state.output.completion = answer
|
204
|
+
|
205
|
+
# exit if we are at max_attempts
|
206
|
+
attempts += 1
|
207
|
+
if attempts >= max_attempts:
|
208
|
+
state.completed = True
|
209
|
+
break
|
210
|
+
|
211
|
+
# exit if the submission is successful
|
212
|
+
answer_scores = await score(state)
|
213
|
+
if score_value_fn(answer_scores[0].value) == 1.0:
|
214
|
+
state.completed = True
|
215
|
+
break
|
216
|
+
|
217
|
+
# otherwise notify the model that it was incorrect and continue
|
218
|
+
else:
|
219
|
+
response_message = (
|
220
|
+
incorrect_message(state, answer_scores)
|
221
|
+
if callable(incorrect_message)
|
222
|
+
else incorrect_message
|
223
|
+
)
|
224
|
+
state.messages.append(
|
225
|
+
ChatMessageUser(content=response_message)
|
226
|
+
)
|
227
|
+
|
228
|
+
# no tool calls, urge the model to continue
|
229
|
+
else:
|
230
|
+
state.messages.append(ChatMessageUser(content=continue_message))
|
231
|
+
|
232
|
+
# propagate current state along with sample limit exceeded
|
233
|
+
except SampleLimitExceededError as ex:
|
234
|
+
raise ex.with_state(state)
|
225
235
|
|
226
236
|
return state
|
227
237
|
|
@@ -1,5 +1,7 @@
|
|
1
1
|
from typing import Literal
|
2
2
|
|
3
|
+
from ._task_state import TaskState
|
4
|
+
|
3
5
|
|
4
6
|
class SampleLimitExceededError(Exception):
|
5
7
|
"""Exception raised when a sample limit is exceeded.
|
@@ -18,9 +20,20 @@ class SampleLimitExceededError(Exception):
|
|
18
20
|
value: int,
|
19
21
|
limit: int,
|
20
22
|
message: str | None = None,
|
23
|
+
state: TaskState | None = None,
|
21
24
|
) -> None:
|
22
25
|
self.type = type
|
23
26
|
self.value = value
|
24
27
|
self.limit = limit
|
25
28
|
self.message = f"Exceeded {type} limit: {limit:,}"
|
29
|
+
self.state = state
|
26
30
|
super().__init__(message)
|
31
|
+
|
32
|
+
def with_state(self, state: TaskState) -> "SampleLimitExceededError":
|
33
|
+
return SampleLimitExceededError(
|
34
|
+
self.type,
|
35
|
+
value=self.value,
|
36
|
+
limit=self.limit,
|
37
|
+
message=self.message,
|
38
|
+
state=state,
|
39
|
+
)
|
inspect_ai/solver/_task_state.py
CHANGED
@@ -22,7 +22,6 @@ from inspect_ai.scorer._metric import Score
|
|
22
22
|
from inspect_ai.scorer._target import Target
|
23
23
|
from inspect_ai.tool import Tool, ToolChoice
|
24
24
|
from inspect_ai.tool._tool_def import ToolDef
|
25
|
-
from inspect_ai.util._limit import SampleLimitExceededError
|
26
25
|
from inspect_ai.util._store import Store, store_jsonable
|
27
26
|
from inspect_ai.util._store_model import SMT
|
28
27
|
|
@@ -173,7 +172,7 @@ class TaskState:
|
|
173
172
|
self.metadata = metadata
|
174
173
|
"""Metadata from the `Sample` for this `TaskState`"""
|
175
174
|
|
176
|
-
self._messages: list[ChatMessage] = ChatMessageList(messages)
|
175
|
+
self._messages: list[ChatMessage] = ChatMessageList(messages, self)
|
177
176
|
"""
|
178
177
|
Chat conversation history for sample.
|
179
178
|
|
@@ -272,7 +271,7 @@ class TaskState:
|
|
272
271
|
@messages.setter
|
273
272
|
def messages(self, messages: list[ChatMessage]) -> None:
|
274
273
|
"""Set messages in chat history."""
|
275
|
-
self._messages = ChatMessageList(messages)
|
274
|
+
self._messages = ChatMessageList(messages, self)
|
276
275
|
|
277
276
|
@property
|
278
277
|
def max_messages(self) -> int | None:
|
@@ -319,8 +318,32 @@ class TaskState:
|
|
319
318
|
|
320
319
|
@property
|
321
320
|
def completed(self) -> bool:
|
322
|
-
"""Is the task completed.
|
323
|
-
|
321
|
+
"""Is the task completed.
|
322
|
+
|
323
|
+
Additionally, checks message and token limits and raises if they are exceeded.
|
324
|
+
"""
|
325
|
+
from inspect_ai.log._samples import set_active_sample_total_messages
|
326
|
+
|
327
|
+
from ._limit import SampleLimitExceededError
|
328
|
+
|
329
|
+
# update messages
|
330
|
+
set_active_sample_total_messages(len(self.messages))
|
331
|
+
|
332
|
+
if self._completed:
|
333
|
+
return True
|
334
|
+
elif self.message_limit and len(self.messages) >= self.message_limit:
|
335
|
+
raise SampleLimitExceededError(
|
336
|
+
"message",
|
337
|
+
value=len(self.messages),
|
338
|
+
limit=self.message_limit,
|
339
|
+
state=self,
|
340
|
+
)
|
341
|
+
elif self.token_limit and self.token_usage >= self.token_limit:
|
342
|
+
raise SampleLimitExceededError(
|
343
|
+
"token", value=self.token_usage, limit=self.token_limit, state=self
|
344
|
+
)
|
345
|
+
else:
|
346
|
+
return self._completed
|
324
347
|
|
325
348
|
@completed.setter
|
326
349
|
def completed(self, completed: bool) -> None:
|
@@ -403,7 +426,8 @@ def sample_jsonable(sample: Sample) -> dict[str, Any]:
|
|
403
426
|
|
404
427
|
|
405
428
|
class ChatMessageList(list[ChatMessage]):
|
406
|
-
def __init__(self, iterable: Iterable[ChatMessage]):
|
429
|
+
def __init__(self, iterable: Iterable[ChatMessage], parent_state: TaskState):
|
430
|
+
self.parent_state = parent_state
|
407
431
|
items, length = self._iterable_length(iterable)
|
408
432
|
self._check_size(length)
|
409
433
|
super().__init__(items)
|
@@ -411,12 +435,18 @@ class ChatMessageList(list[ChatMessage]):
|
|
411
435
|
def _check_size(self, additional_items: int = 1) -> None:
|
412
436
|
from inspect_ai.log._samples import active_sample_message_limit
|
413
437
|
|
438
|
+
from ._limit import SampleLimitExceededError
|
439
|
+
|
414
440
|
messages_limit = active_sample_message_limit()
|
415
441
|
if messages_limit is not None:
|
416
442
|
messages = len(self) + additional_items
|
417
443
|
if messages > messages_limit:
|
418
444
|
raise SampleLimitExceededError(
|
419
|
-
"message",
|
445
|
+
"message",
|
446
|
+
value=messages,
|
447
|
+
limit=messages_limit,
|
448
|
+
message=None,
|
449
|
+
state=self.parent_state,
|
420
450
|
)
|
421
451
|
|
422
452
|
def append(self, item: ChatMessage) -> None:
|
@@ -345,7 +345,9 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
|
|
345
345
|
if sandbox_env:
|
346
346
|
store = store_as(WebBrowserStore)
|
347
347
|
if not store.session_id:
|
348
|
-
result = await sandbox_env.exec(
|
348
|
+
result = await sandbox_env.exec(
|
349
|
+
["python3", WEB_CLIENT_NEW_SESSION], timeout=180
|
350
|
+
)
|
349
351
|
|
350
352
|
if not result.success:
|
351
353
|
raise RuntimeError(
|
@@ -33,8 +33,6 @@ RUN apt-get update && \
|
|
33
33
|
|
34
34
|
# Userland apt-get'able apps
|
35
35
|
RUN apt-get install -y --no-install-recommends \
|
36
|
-
# A simple image viewer.
|
37
|
-
xpaint \
|
38
36
|
# A calculator application.
|
39
37
|
galculator && \
|
40
38
|
apt-get clean
|
@@ -78,7 +76,7 @@ RUN useradd -m -s /bin/bash -d $HOME $USERNAME
|
|
78
76
|
RUN echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
|
79
77
|
USER ${USERNAME}
|
80
78
|
WORKDIR $HOME
|
81
|
-
|
79
|
+
ADD --chown=$USERNAME:$USERNAME image_home_dir/ $HOME
|
82
80
|
|
83
81
|
# configure Firefox to skip all 'first run' UI
|
84
82
|
RUN mkdir -p $HOME/.mozilla/firefox-esr/profile.default && \
|
@@ -0,0 +1,10 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
|
3
|
+
<channel name="xfce4-screensaver" version="1.0">
|
4
|
+
<property name="saver" type="empty">
|
5
|
+
<property name="mode" type="int" value="0" />
|
6
|
+
</property>
|
7
|
+
<property name="lock" type="empty">
|
8
|
+
<property name="enabled" type="bool" value="false" />
|
9
|
+
</property>
|
10
|
+
</channel>
|
inspect_ai/util/__init__.py
CHANGED
@@ -3,7 +3,6 @@ from inspect_ai._util.trace import trace_action, trace_message
|
|
3
3
|
from ._concurrency import concurrency
|
4
4
|
from ._console import input_screen
|
5
5
|
from ._display import DisplayType, display_type
|
6
|
-
from ._limit import SampleLimitExceededError
|
7
6
|
from ._panel import InputPanel, input_panel
|
8
7
|
from ._resource import resource
|
9
8
|
from ._sandbox import (
|
@@ -37,7 +36,6 @@ __all__ = [
|
|
37
36
|
"input_panel",
|
38
37
|
"input_screen",
|
39
38
|
"OutputLimitExceededError",
|
40
|
-
"SampleLimitExceededError",
|
41
39
|
"resource",
|
42
40
|
"subprocess",
|
43
41
|
"SandboxEnvironment",
|
@@ -32,6 +32,7 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
|
|
32
32
|
for fn in [
|
33
33
|
test_read_and_write_file_text,
|
34
34
|
test_read_and_write_file_binary,
|
35
|
+
test_write_file_text_utf,
|
35
36
|
test_read_and_write_file_including_directory_absolute,
|
36
37
|
test_read_and_write_file_including_directory_relative,
|
37
38
|
test_read_file_zero_length,
|
@@ -64,33 +65,39 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
|
|
64
65
|
|
65
66
|
|
66
67
|
async def _cleanup_file(sandbox_env: SandboxEnvironment, filename: str) -> None:
|
67
|
-
res = await sandbox_env.exec(["rm", filename])
|
68
|
+
res = await sandbox_env.exec(["rm", "-f", "--", filename])
|
68
69
|
assert res.success
|
69
70
|
|
70
71
|
|
71
72
|
async def test_read_and_write_file_text(sandbox_env: SandboxEnvironment) -> None:
|
72
|
-
|
73
|
-
|
74
|
-
)
|
75
|
-
written_file_string = await sandbox_env.read_file(
|
76
|
-
"test_read_and_write_file_text.file", text=True
|
77
|
-
)
|
73
|
+
file_name = "test_read_and_write_file_text.file"
|
74
|
+
await sandbox_env.write_file(file_name, "great #content\nincluding newlines")
|
75
|
+
written_file_string = await sandbox_env.read_file(file_name, text=True)
|
78
76
|
assert "great #content\nincluding newlines" == written_file_string, (
|
79
77
|
f"unexpected content: [{written_file_string}]"
|
80
78
|
)
|
81
|
-
await _cleanup_file(sandbox_env,
|
79
|
+
await _cleanup_file(sandbox_env, file_name)
|
80
|
+
|
81
|
+
|
82
|
+
async def test_write_file_text_utf(sandbox_env: SandboxEnvironment) -> None:
|
83
|
+
utf_content = "✨☽︎✨🌞︎︎✨🚀✨"
|
84
|
+
file_name = "test_write_file_text_utf.file"
|
85
|
+
await sandbox_env.write_file(file_name, utf_content)
|
86
|
+
file_with_utf_content = await sandbox_env.read_file(file_name, text=True)
|
87
|
+
assert isinstance(file_with_utf_content, str)
|
88
|
+
assert file_with_utf_content == utf_content
|
89
|
+
await _cleanup_file(sandbox_env, file_name)
|
82
90
|
|
83
91
|
|
84
92
|
async def test_read_and_write_file_binary(sandbox_env: SandboxEnvironment) -> None:
|
93
|
+
file_name = "test_read_and_write_file_binary.file"
|
85
94
|
await sandbox_env.write_file(
|
86
|
-
|
95
|
+
file_name, b"\xc3\x28"
|
87
96
|
) # invalid UTF-8 from https://stackoverflow.com/a/17199164/116509
|
88
97
|
|
89
|
-
written_file_bytes = await sandbox_env.read_file(
|
90
|
-
"test_read_and_write_file_binary.file", text=False
|
91
|
-
)
|
98
|
+
written_file_bytes = await sandbox_env.read_file(file_name, text=False)
|
92
99
|
assert b"\xc3\x28" == written_file_bytes
|
93
|
-
await _cleanup_file(sandbox_env,
|
100
|
+
await _cleanup_file(sandbox_env, file_name)
|
94
101
|
|
95
102
|
|
96
103
|
async def test_read_and_write_file_including_directory_absolute(
|
@@ -101,6 +108,7 @@ async def test_read_and_write_file_including_directory_absolute(
|
|
101
108
|
written_file_string = await sandbox_env.read_file(file_name, text=True)
|
102
109
|
assert "absolutely enjoying being in a directory" == written_file_string
|
103
110
|
await _cleanup_file(sandbox_env, file_name)
|
111
|
+
await sandbox_env.exec(["rmdir", "/tmp/test_rw_including_directory_absolute"])
|
104
112
|
|
105
113
|
|
106
114
|
async def test_read_and_write_file_including_directory_relative(
|
@@ -111,20 +119,23 @@ async def test_read_and_write_file_including_directory_relative(
|
|
111
119
|
written_file_string = await sandbox_env.read_file(file_name, text=True)
|
112
120
|
assert "relatively enjoying being in a directory" == written_file_string
|
113
121
|
await _cleanup_file(sandbox_env, file_name)
|
122
|
+
await sandbox_env.exec(["rmdir", "test_rw_including_directory_relative"])
|
114
123
|
|
115
124
|
|
116
125
|
async def test_read_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
|
117
|
-
|
118
|
-
|
126
|
+
file_name = "zero_length_file.file"
|
127
|
+
await sandbox_env.exec(["touch", file_name])
|
128
|
+
zero_length = await sandbox_env.read_file(file_name, text=True)
|
119
129
|
assert isinstance(zero_length, str)
|
120
130
|
assert zero_length == ""
|
131
|
+
await _cleanup_file(sandbox_env, file_name)
|
121
132
|
|
122
133
|
|
123
134
|
async def test_read_file_not_found(sandbox_env: SandboxEnvironment) -> None:
|
124
|
-
|
135
|
+
file_name = "nonexistent"
|
125
136
|
with Raises(FileNotFoundError) as e_info:
|
126
|
-
await sandbox_env.read_file(
|
127
|
-
assert
|
137
|
+
await sandbox_env.read_file(file_name, text=True)
|
138
|
+
assert file_name in str(e_info.value)
|
128
139
|
|
129
140
|
|
130
141
|
async def test_read_file_not_allowed(sandbox_env: SandboxEnvironment) -> None:
|
@@ -134,22 +145,23 @@ async def test_read_file_not_allowed(sandbox_env: SandboxEnvironment) -> None:
|
|
134
145
|
with Raises(PermissionError) as e_info:
|
135
146
|
await sandbox_env.read_file(file_name, text=True)
|
136
147
|
assert file_name in str(e_info.value)
|
148
|
+
await sandbox_env.exec(["chmod", "+r", file_name])
|
137
149
|
await _cleanup_file(sandbox_env, file_name)
|
138
150
|
|
139
151
|
|
140
152
|
async def test_read_file_is_directory(sandbox_env: SandboxEnvironment) -> None:
|
141
|
-
|
153
|
+
file_name = "/etc"
|
142
154
|
with Raises(IsADirectoryError) as e_info:
|
143
|
-
await sandbox_env.read_file(
|
155
|
+
await sandbox_env.read_file(file_name, text=True)
|
144
156
|
assert "directory" in str(e_info.value)
|
145
157
|
|
146
158
|
|
147
159
|
async def test_read_file_nonsense_name(
|
148
160
|
sandbox_env: SandboxEnvironment,
|
149
161
|
) -> None:
|
150
|
-
|
162
|
+
file_name = "https:/en.wikipedia.org/wiki/Bart%C5%82omiej_Kasprzykowski"
|
151
163
|
with Raises(FileNotFoundError) as e_info:
|
152
|
-
await sandbox_env.read_file(
|
164
|
+
await sandbox_env.read_file(file_name, text=True)
|
153
165
|
assert "wikipedia" in str(e_info.value)
|
154
166
|
|
155
167
|
|
@@ -159,24 +171,28 @@ async def test_read_file_limit(sandbox_env: SandboxEnvironment) -> None:
|
|
159
171
|
# Patch limit down to 1KiB for the test to save us from writing a 100 MiB file.
|
160
172
|
with mock.patch.object(SandboxEnvironmentLimits, "MAX_READ_FILE_SIZE", 1024):
|
161
173
|
with Raises(OutputLimitExceededError) as e_info:
|
162
|
-
await sandbox_env.read_file(
|
174
|
+
await sandbox_env.read_file(file_name, text=True)
|
163
175
|
assert "limit of 100 MiB was exceeded" in str(e_info.value)
|
164
176
|
await _cleanup_file(sandbox_env, file_name)
|
165
177
|
|
166
178
|
|
167
179
|
async def test_write_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
|
168
|
-
|
169
|
-
|
180
|
+
file_name = "zero_length_file.file"
|
181
|
+
await sandbox_env.write_file(file_name, "")
|
182
|
+
zero_length = await sandbox_env.read_file(file_name, text=True)
|
170
183
|
assert isinstance(zero_length, str)
|
171
184
|
assert zero_length == ""
|
185
|
+
await _cleanup_file(sandbox_env, file_name)
|
172
186
|
|
173
187
|
|
174
188
|
async def test_write_file_space(sandbox_env: SandboxEnvironment) -> None:
|
175
|
-
space = "
|
176
|
-
|
177
|
-
|
189
|
+
space = "to the moon"
|
190
|
+
file_name = "file with space.file"
|
191
|
+
await sandbox_env.write_file(file_name, space)
|
192
|
+
file_with_space = await sandbox_env.read_file(file_name, text=True)
|
178
193
|
assert isinstance(file_with_space, str)
|
179
194
|
assert file_with_space == space
|
195
|
+
await _cleanup_file(sandbox_env, file_name)
|
180
196
|
|
181
197
|
|
182
198
|
async def test_write_file_is_directory(
|
@@ -192,6 +208,9 @@ async def test_write_file_is_directory(
|
|
192
208
|
"content cannot go in a directory, dummy",
|
193
209
|
)
|
194
210
|
assert "directory" in str(e_info.value)
|
211
|
+
await sandbox_env.exec(
|
212
|
+
["rm", "-rf", "/tmp/inspect_ai_test_write_file_is_directory"]
|
213
|
+
)
|
195
214
|
|
196
215
|
|
197
216
|
async def test_write_file_without_permissions(
|
@@ -203,6 +222,8 @@ async def test_write_file_without_permissions(
|
|
203
222
|
with Raises(PermissionError) as e_info:
|
204
223
|
await sandbox_env.write_file(file_name, "this won't stick")
|
205
224
|
assert file_name in str(e_info.value)
|
225
|
+
await sandbox_env.exec(["chmod", "+w", file_name])
|
226
|
+
await _cleanup_file(sandbox_env, file_name)
|
206
227
|
|
207
228
|
|
208
229
|
async def test_write_file_exists(
|
@@ -213,6 +234,7 @@ async def test_write_file_exists(
|
|
213
234
|
await sandbox_env.write_file(file_name, "altered content")
|
214
235
|
altered_content = await sandbox_env.read_file(file_name, text=True)
|
215
236
|
assert altered_content == "altered content"
|
237
|
+
await _cleanup_file(sandbox_env, file_name)
|
216
238
|
|
217
239
|
|
218
240
|
async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
|
@@ -305,6 +327,7 @@ async def test_cwd_absolute(sandbox_env: SandboxEnvironment) -> None:
|
|
305
327
|
current_dir_contents = (await sandbox_env.exec(["ls"], cwd=cwd_directory)).stdout
|
306
328
|
assert "test_cwd_absolute.file" in current_dir_contents
|
307
329
|
await _cleanup_file(sandbox_env, file_name)
|
330
|
+
await sandbox_env.exec(["rmdir", cwd_directory])
|
308
331
|
|
309
332
|
|
310
333
|
async def test_exec_stdout_is_limited(sandbox_env: SandboxEnvironment) -> None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.61
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Safety Institute
|
6
6
|
License: MIT License
|
@@ -68,7 +68,7 @@ Requires-Dist: pytest-asyncio; extra == "dev"
|
|
68
68
|
Requires-Dist: pytest-cov; extra == "dev"
|
69
69
|
Requires-Dist: pytest-dotenv; extra == "dev"
|
70
70
|
Requires-Dist: pytest-xdist; extra == "dev"
|
71
|
-
Requires-Dist: ruff==0.9.
|
71
|
+
Requires-Dist: ruff==0.9.4; extra == "dev"
|
72
72
|
Requires-Dist: textual-dev>=0.86.2; extra == "dev"
|
73
73
|
Requires-Dist: types-PyYAML; extra == "dev"
|
74
74
|
Requires-Dist: types-beautifulsoup4; extra == "dev"
|
@@ -3,7 +3,7 @@ inspect_ai/__main__.py,sha256=oWX4YwDZDg3GS3-IG0yPGoSEOfSzWihELg7QmrUlxjM,67
|
|
3
3
|
inspect_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
inspect_ai/_cli/cache.py,sha256=nOX9ysB3rZ-V8b_ryTpkgpoAynNlq4Op_fIqAIl4EVg,3910
|
5
5
|
inspect_ai/_cli/common.py,sha256=5smokbnQGpOe72WXlpDy1QWuWbjxILKnl5UPvgfW0Yk,3678
|
6
|
-
inspect_ai/_cli/eval.py,sha256=
|
6
|
+
inspect_ai/_cli/eval.py,sha256=LW2KH4iENl5QF10ItTwHzHM7v26RPS7-1XevaQjSa5E,31968
|
7
7
|
inspect_ai/_cli/info.py,sha256=d5W7VA5buinGcsdQjWqlsMM6iSNNGRUHQrr4JS2k8nk,1749
|
8
8
|
inspect_ai/_cli/list.py,sha256=GreVEhJRpagiCpzVc3FSGhcdpTq9B8Jh--mfgs4ueFQ,2454
|
9
9
|
inspect_ai/_cli/log.py,sha256=boSzHZkiabhnYWHLRVsZVENCryG-MDaNHIIbpSp0Mcs,5729
|
@@ -55,11 +55,11 @@ inspect_ai/_eval/task/__init__.py,sha256=yhBZbjh67QfHy7IdyFGMyQIfBflQLPLkhmz5eEv
|
|
55
55
|
inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
|
56
56
|
inspect_ai/_eval/task/epochs.py,sha256=Ci7T6CQniSOTChv5Im2dCdSDrP-5hq19rV6iJ2uBcH8,700
|
57
57
|
inspect_ai/_eval/task/error.py,sha256=gJnd8X7LHpPz5zcOq_az6ONZICGJ0_VpSz9yhF0yRyY,1233
|
58
|
-
inspect_ai/_eval/task/generate.py,sha256=
|
58
|
+
inspect_ai/_eval/task/generate.py,sha256=lm066fbZOX7o3NB57rbwwec-ZaIFE745fiuacPCmo20,2352
|
59
59
|
inspect_ai/_eval/task/images.py,sha256=Tg3I7d7ThCYP_Lf-H5JA7xH-sH2W-m1c1YfswDwplt4,3949
|
60
60
|
inspect_ai/_eval/task/log.py,sha256=TjyLglP-3gMMDkfmxYbsxTvGIWY9FEyVtB09Fyrm_PA,6166
|
61
61
|
inspect_ai/_eval/task/results.py,sha256=GkWlgHexm0BPyxrmqLY6YSXz3AqXYMVLXNnKCYfW7ic,15785
|
62
|
-
inspect_ai/_eval/task/run.py,sha256=
|
62
|
+
inspect_ai/_eval/task/run.py,sha256=FdTysq-2dMFEXWIs7m8ONbXXDabx9V2saJecBurlQKs,34894
|
63
63
|
inspect_ai/_eval/task/rundir.py,sha256=QXetLfqi1lRo-PcIuu7maQpVO57c2ebnsjfZk0lsAFk,2001
|
64
64
|
inspect_ai/_eval/task/sandbox.py,sha256=kwG1SQawZFDle3hzqMe4hSdnuvShkKsmMIrcC2gnYHM,6120
|
65
65
|
inspect_ai/_eval/task/task.py,sha256=ahlM-0MJc_4wFCWTGQIsnapUn0epka_9jRVK-xdapHY,13800
|
@@ -83,7 +83,7 @@ inspect_ai/_util/file.py,sha256=49NXD2R_qVDMScBfifiHKhQ6ypB4OyX6cA3ym1k0-1U,1226
|
|
83
83
|
inspect_ai/_util/format.py,sha256=RWmK4JcB7NwRy4rXtUa1JJ52_KhxcvREhMMCFVHvzwQ,1179
|
84
84
|
inspect_ai/_util/git.py,sha256=nHCtZMLjMyFjSC_9bksBXeFz4xqxZfY6lfXr_qg2n1E,760
|
85
85
|
inspect_ai/_util/hash.py,sha256=N25e4B_Lp9ZFSQIrtMO4x-KrZopJL6gKhs-NO41pxzA,289
|
86
|
-
inspect_ai/_util/hooks.py,sha256=
|
86
|
+
inspect_ai/_util/hooks.py,sha256=8QnHCQQY_2XMYPkiPvixUgFY0E_niZvQhQDMI-eCdhM,4353
|
87
87
|
inspect_ai/_util/html.py,sha256=X62FY8gpEJ2ZQoDu2y8aQAbiBUIHKsd7DA9rWCIleo8,168
|
88
88
|
inspect_ai/_util/http.py,sha256=c4yvH48ZkETZ7sNDuNzBR0NUS4r-6WzCaolW9my13ns,3628
|
89
89
|
inspect_ai/_util/images.py,sha256=W7QJHyzuXhfy3VsLhKTzddSo1g69O9RxnTyhat48Wyo,1312
|
@@ -127,7 +127,7 @@ inspect_ai/_view/www/favicon.svg,sha256=b9AHYZaO2zBzeKH6G4PwXZMGGW_UxY0omKHam-c9
|
|
127
127
|
inspect_ai/_view/www/index.html,sha256=LDaPH75d5TlepHfIY3wQG0aBcHTpa90Bhx0er_ES45s,910
|
128
128
|
inspect_ai/_view/www/jsconfig.json,sha256=vt1gPPYezOFeV9nofA93CmVJAKGb1QeKGuyvEn1CXgk,383
|
129
129
|
inspect_ai/_view/www/log-schema.json,sha256=4PRhm3dJvhwPhEZCDUFMwzHcIsPNLGGPuuqXHIwSUxk,95006
|
130
|
-
inspect_ai/_view/www/package.json,sha256=
|
130
|
+
inspect_ai/_view/www/package.json,sha256=y2cHvK7QKQcVk2v66ldn-syN649xnAjTVHju4QFJY2s,1189
|
131
131
|
inspect_ai/_view/www/postcss.config.cjs,sha256=jQ-QIJFuBVUTZXbFpOvUJk4MsLBr_yPOQbRwHD0ZohE,340
|
132
132
|
inspect_ai/_view/www/tsconfig.json,sha256=JjaLdpt13sgJYHDWdThRIr0gHzpRzEOKL4E2awt9r9s,277
|
133
133
|
inspect_ai/_view/www/vite.config.js,sha256=jmSUrV0YzGCcinfyKcmy2bDRUE10mmPUxMAen0bX8jY,1103
|
@@ -304,17 +304,17 @@ inspect_ai/log/_recorders/recorder.py,sha256=yvW_D99QxUQmnF5EiGsWIVetBXdssMUcsq5
|
|
304
304
|
inspect_ai/model/__init__.py,sha256=gYBnBjfEEG_GQhu_lhwQ2tW9U4nXLW0GtRJNGfwYy3k,2121
|
305
305
|
inspect_ai/model/_cache.py,sha256=IQXhMas58Pjdma894GHGTtHYpmnf_Ojz_eE0kHaQVPs,13567
|
306
306
|
inspect_ai/model/_call_tools.py,sha256=Vy3uXVpHY9b0gQrXu3KGmvEOWXntSCxbpJ0elTAQ0So,18128
|
307
|
-
inspect_ai/model/_chat_message.py,sha256=
|
307
|
+
inspect_ai/model/_chat_message.py,sha256=rJ1_pBn0sQdsr4R_DQUrg2PvRzi4VaYc2N_ttikuFy0,4454
|
308
308
|
inspect_ai/model/_conversation.py,sha256=SFumVE67sq-mgSfqaZw2xwE8kow5NxF6FU8VbXsvc8k,1988
|
309
309
|
inspect_ai/model/_generate_config.py,sha256=WjlFH6WtfyIpF6TMcSFmIUxyyB0D4quZLIqMd82oEW8,8757
|
310
310
|
inspect_ai/model/_image.py,sha256=kpO2Bn_-c-dK80HuPOPH1eSNmcoc39kofwf4yTTiTFE,477
|
311
|
-
inspect_ai/model/_model.py,sha256=
|
311
|
+
inspect_ai/model/_model.py,sha256=9rLk3F33By0Gc8d7_LdS9r4sTth11QxC5tGu3m_4suo,37020
|
312
312
|
inspect_ai/model/_model_call.py,sha256=r6ObHZwm7jy1qX2qxvAP6iPV3BhdGThL-VH-QEawQhA,2017
|
313
313
|
inspect_ai/model/_model_output.py,sha256=X6dEH3L4XPs5H8cWQeVF8tlkDMNRqP3StJlPA_z140E,7029
|
314
314
|
inspect_ai/model/_openai.py,sha256=XhYu_Rdc5jLGkrgdIkbniNWlQVBx9iYj2DdDTK1U12o,12871
|
315
315
|
inspect_ai/model/_registry.py,sha256=Cr2y32EqLnOqLbSWoXHVK4ivTTzCUhJuACxoTyPt8kY,2032
|
316
316
|
inspect_ai/model/_render.py,sha256=rWypNUjgrH4NGp0r-ESAze9gZz7lYNjheEP438vRYZE,922
|
317
|
-
inspect_ai/model/_providers/anthropic.py,sha256=
|
317
|
+
inspect_ai/model/_providers/anthropic.py,sha256=4a07C0PGxWyyHGZuKol6EXULsDC7lphqyJgSdaOJ1gY,24674
|
318
318
|
inspect_ai/model/_providers/azureai.py,sha256=moIC4-um_Qs3iXbr4DlP6LUL924aF-s5YyQqF0V5ye4,14037
|
319
319
|
inspect_ai/model/_providers/bedrock.py,sha256=BiSEQYlGLKqaadGUJxSQuule3JPLZbAIjfhJ36DYQ8k,23357
|
320
320
|
inspect_ai/model/_providers/cloudflare.py,sha256=h6ubjf0kxyMM7Aj2tm68tWa-2R7RAXNGp1O6KMvi0Gw,4143
|
@@ -359,16 +359,17 @@ inspect_ai/scorer/_reducer/__init__.py,sha256=ntoSXbbBia6gN3Uk3tQFQ8lSt8IBSRvwM5
|
|
359
359
|
inspect_ai/scorer/_reducer/reducer.py,sha256=g8F7sTm_FmPcLdavOGv0YuvqZ5_nz2irmQVq37h2Y5A,11494
|
360
360
|
inspect_ai/scorer/_reducer/registry.py,sha256=J2tvuuxf4jBC09_SCBZg99Qb2qQUWG8STEsw7ASWpXQ,5388
|
361
361
|
inspect_ai/scorer/_reducer/types.py,sha256=uimvzIBRK7x1Dof77gsHYe9PU3hekB1opm9DTAa4sL4,340
|
362
|
-
inspect_ai/solver/__init__.py,sha256=
|
363
|
-
inspect_ai/solver/_basic_agent.py,sha256=
|
362
|
+
inspect_ai/solver/__init__.py,sha256=I8lmfnohTUYyW3aR7sCQhkOBh9R75a0-QrYkR3hG76E,3433
|
363
|
+
inspect_ai/solver/_basic_agent.py,sha256=DaPMu_2SuoBamYwd54soxGaW2lcK21Siuf0qYW9Ps9w,10134
|
364
364
|
inspect_ai/solver/_chain.py,sha256=F-2ZHE2KOlDAIgH1_Q23whUMH5TjYGvCHhcOgbRxe7I,2234
|
365
365
|
inspect_ai/solver/_critique.py,sha256=ddO8J7VqSEsT6kofawpn3PrcUpLjLUMFmJi0hocDZpI,3504
|
366
366
|
inspect_ai/solver/_fork.py,sha256=Ge1PwpCHjeZhm2CLAWKss2uFuQd9BGzVinLOW6UOnfE,2880
|
367
|
+
inspect_ai/solver/_limit.py,sha256=zaZseJgjbJaBnGdXQHQ5MpU4tzgUyD8FzLvJMGDk3jA,1122
|
367
368
|
inspect_ai/solver/_multiple_choice.py,sha256=tSLrwqAHuvX_eccM6OXiRmlx5bx_3g1LcB8GDWWV9C0,11024
|
368
369
|
inspect_ai/solver/_plan.py,sha256=Dp1DDTtGe2iTo8CYWKqCOdfBFfTK_0wi2JzIr6qrikI,7042
|
369
370
|
inspect_ai/solver/_prompt.py,sha256=PwGtLQ-dnCzxN_74H1NDT7LAhUuuiw2-c6ZSyXgBFgQ,3953
|
370
371
|
inspect_ai/solver/_solver.py,sha256=Q-FrkfD97_TufEzuQxzr_LgziCdQipIuy778NWq7vVM,9008
|
371
|
-
inspect_ai/solver/_task_state.py,sha256=
|
372
|
+
inspect_ai/solver/_task_state.py,sha256=Timv9_961yPNjh07BBUL0QeHeLLKx6b-QBsN1ocnEvY,16237
|
372
373
|
inspect_ai/solver/_transcript.py,sha256=gkH9CC5gYbz7ZzrFD0TkjtKYjWxQP5EthJOkq8NXDOc,1049
|
373
374
|
inspect_ai/solver/_use_tools.py,sha256=W7muO8r9eThXydm1GjFF-f6gip9AhzhgAG2GHSE5EpM,2011
|
374
375
|
inspect_ai/solver/_util.py,sha256=pthrf-CzC6FnQYSUFLXTYM4wFEJptZrh5POTmV-Jtow,446
|
@@ -402,7 +403,7 @@ inspect_ai/tool/_tool_with.py,sha256=YBHjhT9PuM2QyUxD_BzhgqFPFfUPoRrTIpXMBXMXlFY
|
|
402
403
|
inspect_ai/tool/_tools/_execute.py,sha256=DkFlvUTvI595H1zH5IKArhbyBo8YZWqq9tvoUMdvlaw,2823
|
403
404
|
inspect_ai/tool/_tools/_web_search.py,sha256=YqZ3E65ssdq1X2NSH9Mqt5INXdPVQOdKa3PbKi7XjAY,7828
|
404
405
|
inspect_ai/tool/_tools/_web_browser/__init__.py,sha256=dnnzy96pcvMvxD1OGg4hG-doL7Ru7WH0i25Sb9VIXwE,65
|
405
|
-
inspect_ai/tool/_tools/_web_browser/_web_browser.py,sha256=
|
406
|
+
inspect_ai/tool/_tools/_web_browser/_web_browser.py,sha256=Bzb5RtxlQuxGgbkhxyaUTaj6VdNClgePBl5IuX2JJhY,15584
|
406
407
|
inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile,sha256=Sh1Ht5oBuxZC_8hLzw877CIvM9me_8Q0MxMemR5E_js,431
|
407
408
|
inspect_ai/tool/_tools/_web_browser/_resources/README.md,sha256=RAMe6uFUYepkPSqpdCuag0nqASuFEONDI7jOHagYfuI,2607
|
408
409
|
inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py,sha256=PuOOeF5rDjN9tz-kRQ_UZUXj7MzrjwuFEdhVIYcCcQw,9628
|
@@ -423,16 +424,16 @@ inspect_ai/tool/beta/_computer/__init__.py,sha256=fq4BSM4aDhtEtE4279xm47NiO6vyiZ
|
|
423
424
|
inspect_ai/tool/beta/_computer/_common.py,sha256=6XK6MBu2ZiRCao_eMlZdjXEvTmbeKQRN0K-8MtBPsk4,4059
|
424
425
|
inspect_ai/tool/beta/_computer/_computer.py,sha256=2R-3GLoSvQn8b0rVPur3jMzaRK4nS6i_sDwzicj5XJ8,7433
|
425
426
|
inspect_ai/tool/beta/_computer/_computer_split.py,sha256=H3DVCJqpHp_2ra85W_z9s5r-oHTVWwctuEq5fDdy2T4,5588
|
426
|
-
inspect_ai/tool/beta/_computer/_resources/Dockerfile,sha256=
|
427
|
+
inspect_ai/tool/beta/_computer/_resources/Dockerfile,sha256=aLdS_UK2-bXiDQcfhGjVytPUQR1lb_WAQN8x4Ssa7_I,3498
|
427
428
|
inspect_ai/tool/beta/_computer/_resources/README.md,sha256=5JDNaGJ-Ebq6Io57ANFIqgjPoh11aGDSrrgrhyfiqxU,1657
|
428
429
|
inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh,sha256=IR8sE-b22YO7lwzdDiyjhLTJWIf0X__wA8WE98dwkwM,394
|
429
430
|
inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh,sha256=PAbMgSvprnLvbj8A8a59o_yDfm-jiCXxBxsPb004Bf8,383
|
430
|
-
inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh,sha256=
|
431
|
+
inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh,sha256=JFcW46u2ioDpGLptmUOMaqtt2YvuFhCTB42cyWRmo8c,993
|
431
432
|
inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh,sha256=w_27I4o7usP8SUMzP3lrXeNuISslyy1aywkgpm_2l4Q,209
|
432
433
|
inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh,sha256=hd2naWFFpm3S0cWZ6Lhlpm6KD3L6-g8Zw2dgxchXMUg,1118
|
434
|
+
inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml,sha256=jNgaNqBCngQlykTlLhmmdc_LLOrH2AMk7pUpLkbCjMY,312
|
433
435
|
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop,sha256=Odm77RSEiTlMx7cY8odUCO2a8fvIUwHcpEUanpHzbL0,181
|
434
436
|
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop,sha256=jYYu8pcdIhFCC_3cEgO-0z0A6eQO2WQkIVViebSBbpA,184
|
435
|
-
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop,sha256=T093gZ3B2aXNd0yo6J31rJ0HeE3ROXPCbgAWxZqtjDA,158
|
436
437
|
inspect_ai/tool/beta/_computer/_resources/tool/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
437
438
|
inspect_ai/tool/beta/_computer/_resources/tool/_logger.py,sha256=owkNYe9lyShTLXoMqhK4Qtzacnt5FvSxN8Wqf2MO5XM,652
|
438
439
|
inspect_ai/tool/beta/_computer/_resources/tool/_run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
|
@@ -440,12 +441,11 @@ inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py,sha256=cd6JNFhwyI
|
|
440
441
|
inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py,sha256=rLduqd6JduoM9nMQk2-q7lpK4TCasz2F6_6mexquInI,9566
|
441
442
|
inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py,sha256=0ehJuuUO6AlWUZKt3TyUbWQuwyBmkpsBbHxizZI_0GQ,2574
|
442
443
|
inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
443
|
-
inspect_ai/util/__init__.py,sha256=
|
444
|
+
inspect_ai/util/__init__.py,sha256=Nbr5h9EDqnUFqj1SSm5hJccHp_sz2YB1SCZgFS0NYDk,1388
|
444
445
|
inspect_ai/util/_concurrency.py,sha256=Olzk259NPeSXIy5LzID_WEVGnaW6p5CBCBrmlZUYufM,2227
|
445
446
|
inspect_ai/util/_console.py,sha256=V1XkIoKcNZo0SgRUOv15zJAWz6-zV6267hC4Oldj8oY,1237
|
446
447
|
inspect_ai/util/_conversation.py,sha256=KzqvKfj1tB14cgARZjYyIVG2EpuE-EZKqLGAPIXv1Xs,784
|
447
448
|
inspect_ai/util/_display.py,sha256=IUVyzS0PtFo9LeRW_EWvDv7tkGy1rsZGBjqg63uOPOs,1591
|
448
|
-
inspect_ai/util/_limit.py,sha256=HMgembPprMvJFeFQy82Gw_BkPX4mqYBP1mGu-aA0n5g,751
|
449
449
|
inspect_ai/util/_panel.py,sha256=bSXXV1LDVMt8DeGWEYTfEm3iMz9I02sX5xpBSVUVRdQ,3072
|
450
450
|
inspect_ai/util/_resource.py,sha256=MMmtTKtt78pDIp9Uc_OxJom_q8mcKozVqt8kosKRJt0,3420
|
451
451
|
inspect_ai/util/_store.py,sha256=QemJe2M-RK6zSFNcd07_92XFjvNtWKgHzBr5eT3KF1I,3786
|
@@ -459,7 +459,7 @@ inspect_ai/util/_sandbox/environment.py,sha256=Qo7ne28L6fn3igo2Gd0H1lz4vP60IdJGS
|
|
459
459
|
inspect_ai/util/_sandbox/limits.py,sha256=K-GjKfSugOq8KP0wW_oF6qFrXsOnMV0C88QUWkjPJ9o,2164
|
460
460
|
inspect_ai/util/_sandbox/local.py,sha256=NkHnR_e7s7RFsBdwfaSR7Yzp6lSUc7Em0Pc9_CFuN4c,3534
|
461
461
|
inspect_ai/util/_sandbox/registry.py,sha256=mQwWwqzaCXF1FZ2fcVujpp3WMA35GWnh1w43SoIJAVM,2145
|
462
|
-
inspect_ai/util/_sandbox/self_check.py,sha256=
|
462
|
+
inspect_ai/util/_sandbox/self_check.py,sha256=ZRb2wMRnyiUBJPJqTfLlUO2_ctxBoJ-4soyERfn583A,15222
|
463
463
|
inspect_ai/util/_sandbox/service.py,sha256=2os7W8NYBDcaBoaHVfZ1YrI9hvldksmiwqkUYrCRCPo,11258
|
464
464
|
inspect_ai/util/_sandbox/docker/cleanup.py,sha256=MK6UlADcWtTDotppeVJga2ibf9Ud-e4V-5ReoNbmhqg,4793
|
465
465
|
inspect_ai/util/_sandbox/docker/compose.py,sha256=4aIWWTaTUY9ZWrfSynkRqrUbKlYWrRYoSDX9WrjdHFQ,11473
|
@@ -468,9 +468,9 @@ inspect_ai/util/_sandbox/docker/docker.py,sha256=sx4PNv_4PDuKlkeYV6ASaZbo0XT-I_V
|
|
468
468
|
inspect_ai/util/_sandbox/docker/internal.py,sha256=fATyk2pdtjSl-D0VPT4dmkXV-gOc5HrPH0EQDW4IAJY,1446
|
469
469
|
inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
|
470
470
|
inspect_ai/util/_sandbox/docker/util.py,sha256=pSPsRGymrTmTnEUHiHoQSNqeurPP1mL5kB-105O6EWo,2794
|
471
|
-
inspect_ai-0.3.
|
472
|
-
inspect_ai-0.3.
|
473
|
-
inspect_ai-0.3.
|
474
|
-
inspect_ai-0.3.
|
475
|
-
inspect_ai-0.3.
|
476
|
-
inspect_ai-0.3.
|
471
|
+
inspect_ai-0.3.61.dist-info/LICENSE,sha256=aYPffOl9TwBXDQ8g33Jh6AsBhobb3A76qNm7r2HZsps,1079
|
472
|
+
inspect_ai-0.3.61.dist-info/METADATA,sha256=8fUvCJ8CAPh0ANPl4OPuHGBmbhbewMoH6F1AA94SRTA,4528
|
473
|
+
inspect_ai-0.3.61.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
474
|
+
inspect_ai-0.3.61.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
|
475
|
+
inspect_ai-0.3.61.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
|
476
|
+
inspect_ai-0.3.61.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|