inspect-ai 0.3.60__py3-none-any.whl → 0.3.61__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
inspect_ai/_cli/eval.py CHANGED
@@ -592,7 +592,6 @@ def eval_set_command(
592
592
  logit_bias: str | None,
593
593
  seed: int | None,
594
594
  stop_seqs: str | None,
595
- suffix: str | None,
596
595
  temperature: float | None,
597
596
  top_p: float | None,
598
597
  top_k: int | None,
@@ -8,6 +8,7 @@ from inspect_ai.model import (
8
8
  )
9
9
  from inspect_ai.model._cache import epoch
10
10
  from inspect_ai.solver import TaskState
11
+ from inspect_ai.solver._limit import SampleLimitExceededError
11
12
  from inspect_ai.tool import ToolFunction
12
13
 
13
14
 
@@ -21,45 +22,50 @@ async def task_generate(
21
22
  # track tool_choice (revert to "auto" after first forced call of a tool)
22
23
  tool_choice = state.tool_choice
23
24
 
24
- while True:
25
- # If we don't update the epoch here as we go, it's entirely possible
26
- # we'd cache the same response for every single epoch, which would
27
- # completely defeat the point!
28
- epoch.set(state.epoch)
25
+ try:
26
+ while True:
27
+ # If we don't update the epoch here as we go, it's entirely possible
28
+ # we'd cache the same response for every single epoch, which would
29
+ # completely defeat the point!
30
+ epoch.set(state.epoch)
29
31
 
30
- # call the model
31
- state.output = await model.generate(
32
- input=state.messages,
33
- tools=state.tools,
34
- tool_choice=tool_choice,
35
- config=config,
36
- cache=cache,
37
- )
32
+ # call the model
33
+ state.output = await model.generate(
34
+ input=state.messages,
35
+ tools=state.tools,
36
+ tool_choice=tool_choice,
37
+ config=config,
38
+ cache=cache,
39
+ )
38
40
 
39
- # append the assistant message
40
- message = state.output.message
41
- state.messages.append(message)
41
+ # append the assistant message
42
+ message = state.output.message
43
+ state.messages.append(message)
42
44
 
43
- # check for completed
44
- if state.completed:
45
- return state
45
+ # check for completed
46
+ if state.completed:
47
+ return state
46
48
 
47
- # resolve tool calls if necessary
48
- if tool_calls != "none" and message.tool_calls:
49
- # call tools and append messages to state
50
- state.messages.extend(
51
- await call_tools(message, state.tools, config.max_tool_output)
52
- )
49
+ # resolve tool calls if necessary
50
+ if tool_calls != "none" and message.tool_calls:
51
+ # call tools and append messages to state
52
+ state.messages.extend(
53
+ await call_tools(message, state.tools, config.max_tool_output)
54
+ )
53
55
 
54
- # check for completed or only executing a single tool call
55
- if state.completed or tool_calls == "single":
56
- return state
56
+ # check for completed or only executing a single tool call
57
+ if state.completed or tool_calls == "single":
58
+ return state
59
+
60
+ # if a tool_call was forced set tool_choice to 'auto'
61
+ # (otherwise it will get forced over and over again)
62
+ if isinstance(tool_choice, ToolFunction):
63
+ tool_choice = "auto"
57
64
 
58
- # if a tool_call was forced set tool_choice to 'auto'
59
- # (otherwise it will get forced over and over again)
60
- if isinstance(tool_choice, ToolFunction):
61
- tool_choice = "auto"
65
+ # no tool calls or not resolving tool calls, we are done!
66
+ else:
67
+ return state
62
68
 
63
- # no tool calls or not resolving tool calls, we are done!
64
- else:
65
- return state
69
+ # propagate current state along with sample limit exceeded
70
+ except SampleLimitExceededError as ex:
71
+ raise ex.with_state(state)
@@ -75,9 +75,9 @@ from inspect_ai.scorer._scorer import unique_scorer_name
75
75
  from inspect_ai.solver import Generate, Plan, TaskState
76
76
  from inspect_ai.solver._chain import Chain, unroll
77
77
  from inspect_ai.solver._fork import set_task_generate
78
+ from inspect_ai.solver._limit import SampleLimitExceededError
78
79
  from inspect_ai.solver._solver import Solver
79
80
  from inspect_ai.solver._task_state import sample_state, set_sample_state, state_jsonable
80
- from inspect_ai.util._limit import SampleLimitExceededError
81
81
  from inspect_ai.util._sandbox.context import sandbox_connections
82
82
  from inspect_ai.util._sandbox.environment import SandboxEnvironmentSpec
83
83
  from inspect_ai.util._subtask import init_subtask
@@ -402,7 +402,13 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
402
402
  view_notify_eval(logger.location)
403
403
 
404
404
  try:
405
- await send_telemetry("eval_log", eval_log_json_str(eval_log))
405
+ if (
406
+ await send_telemetry("eval_log_location", eval_log.location)
407
+ == "not_handled"
408
+ ):
409
+ # Converting the eval log to JSON is expensive. Only do so if
410
+ # eval_log_location was not handled.
411
+ await send_telemetry("eval_log", eval_log_json_str(eval_log))
406
412
  except Exception as ex:
407
413
  py_logger.warning(
408
414
  f"Error occurred sending telemetry: {exception_message(ex)}"
@@ -650,7 +656,7 @@ async def task_run_sample(
650
656
  )
651
657
 
652
658
  # capture most recent state for scoring
653
- state = sample_state() or state
659
+ state = ex.state or sample_state() or state
654
660
  state.completed = True
655
661
 
656
662
  except BaseException as ex:
@@ -734,7 +740,7 @@ async def task_run_sample(
734
740
  error = handle_error(ex)
735
741
 
736
742
  # handle sandboxenv init errors
737
- except BaseException as ex:
743
+ except Exception as ex:
738
744
  error = handle_error(ex)
739
745
 
740
746
  # complete the sample
inspect_ai/_util/hooks.py CHANGED
@@ -17,19 +17,29 @@ from .error import PrerequisiteError
17
17
  #
18
18
  # Telemetry can be optionally enabled by setting an INSPECT_TELEMETRY
19
19
  # environment variable that points to a function in a package which
20
- # conforms to the TelemetrySend signature below.
20
+ # conforms to the TelemetrySend signature below. A return value of True
21
+ # indicates that the telemetry event was handled.
21
22
 
22
- # There are currently two types of telemetry sent:
23
- # - model_usage (type ModelUsage)
24
- # - eval_log (type EvalLog)
23
+ # There are currently three types of telemetry sent:
24
+ # - model_usage (JSON string of the model usage)
25
+ # - eval_log_location (file path or URL string of the eval log)
26
+ # - eval_log (JSON string of the eval log)
27
+ # [only sent if eval_log_location unhandled]
28
+ # The eval_log_location type is preferred over eval_log as it means we can take
29
+ # advantage of the .eval format and avoid loading the whole log into memory.
25
30
 
26
- TelemetrySend = Callable[[str, str], Awaitable[None]]
31
+ TelemetrySend = Callable[[str, str], Awaitable[bool]]
27
32
 
28
33
 
29
- async def send_telemetry(type: Literal["model_usage", "eval_log"], json: str) -> None:
34
+ async def send_telemetry(
35
+ type: Literal["model_usage", "eval_log", "eval_log_location"], json: str
36
+ ) -> Literal["handled", "not_handled", "no_subscribers"]:
30
37
  global _send_telemetry
31
38
  if _send_telemetry:
32
- await _send_telemetry(type, json)
39
+ if await _send_telemetry(type, json):
40
+ return "handled"
41
+ return "not_handled"
42
+ return "no_subscribers"
33
43
 
34
44
 
35
45
  _send_telemetry: TelemetrySend | None = None
@@ -26,7 +26,7 @@
26
26
  },
27
27
  "dependencies": {
28
28
  "@popperjs/core": "^2.11.8",
29
- "asciinema-player": "^3.8.1",
29
+ "asciinema-player": "^3.8.2",
30
30
  "bootstrap": "^5.3.3",
31
31
  "bootstrap-icons": "^1.11.3",
32
32
  "clipboard": "^2.0.11",
@@ -72,8 +72,8 @@ class ChatMessageUser(ChatMessageBase):
72
72
  role: Literal["user"] = Field(default="user")
73
73
  """Conversation role."""
74
74
 
75
- tool_call_id: str | None = Field(default=None)
76
- """ID of tool call this message has the content payload for."""
75
+ tool_call_id: list[str] | None = Field(default=None)
76
+ """ID(s) of tool call(s) this message has the content payload for."""
77
77
 
78
78
 
79
79
  class ChatMessageAssistant(ChatMessageBase):
@@ -33,7 +33,6 @@ from inspect_ai._util.trace import trace_action
33
33
  from inspect_ai.tool import Tool, ToolChoice, ToolFunction, ToolInfo
34
34
  from inspect_ai.tool._tool_def import ToolDef, tool_defs
35
35
  from inspect_ai.util import concurrency
36
- from inspect_ai.util._limit import SampleLimitExceededError
37
36
 
38
37
  from ._cache import CacheEntry, CachePolicy, cache_fetch, cache_store
39
38
  from ._call_tools import disable_parallel_tools, tool_call_view, tools_info
@@ -764,40 +763,104 @@ def resolve_tool_model_input(
764
763
  def tool_result_images_as_user_message(
765
764
  messages: list[ChatMessage],
766
765
  ) -> list[ChatMessage]:
767
- return functools.reduce(tool_result_images_reducer, messages, [])
766
+ """
767
+ To conform to models lacking support for images in tool responses, create an alternate message history that moves images into a fabricated user message.
768
+
769
+ Tool responses will have images replaced with "Image content is included below.", and the new user message will contain the images.
770
+ """
771
+ init_accum: ImagesAccumulator = ([], [], [])
772
+ chat_messages, user_message_content, tool_call_ids = functools.reduce(
773
+ tool_result_images_reducer, messages, init_accum
774
+ )
775
+ # if the last message was a tool result, we may need to flush the pending stuff here
776
+ return maybe_adding_user_message(chat_messages, user_message_content, tool_call_ids)
777
+
778
+
779
+ ImagesAccumulator = tuple[list[ChatMessage], list[Content], list[str]]
780
+ """
781
+ ImagesAccumulator is a tuple containing three lists:
782
+ - The first list contains ChatMessages that are the result of processing.
783
+ - The second list contains ContentImages that need to be inserted into a fabricated user message.
784
+ - The third list contains the tool_call_id's associated with the tool responses.
785
+ """
768
786
 
769
787
 
770
788
  def tool_result_images_reducer(
771
- messages: list[ChatMessage],
789
+ accum: ImagesAccumulator,
772
790
  message: ChatMessage,
773
- ) -> list[ChatMessage]:
791
+ ) -> ImagesAccumulator:
792
+ messages, pending_content, tool_call_ids = accum
774
793
  # if there are tool result images, pull them out into a ChatUserMessage
775
- if isinstance(message, ChatMessageTool) and isinstance(message.content, list):
776
- tool_message = ChatMessageTool(
777
- content=message.content.copy(),
778
- tool_call_id=message.tool_call_id,
779
- function=message.function,
794
+ if (
795
+ isinstance(message, ChatMessageTool)
796
+ and isinstance(message.content, list)
797
+ and any([isinstance(c, ContentImage) for c in message.content])
798
+ ):
799
+ init_accum: ImageContentAccumulator = ([], [])
800
+ new_user_message_content, edited_tool_message_content = functools.reduce(
801
+ tool_result_image_content_reducer, message.content, init_accum
780
802
  )
781
- assert isinstance(tool_message.content, list)
782
- messages.append(tool_message)
783
-
784
- user_content: list[Content] = []
785
- for i in range(0, len(tool_message.content)):
786
- if isinstance(tool_message.content[i], ContentImage):
787
- user_content.append(message.content[i])
788
- tool_message.content[i] = ContentText(
789
- text="Image content is in the message below."
803
+
804
+ return (
805
+ messages
806
+ + [
807
+ ChatMessageTool(
808
+ content=edited_tool_message_content,
809
+ tool_call_id=message.tool_call_id,
810
+ function=message.function,
790
811
  )
791
- if len(user_content) > 0:
792
- messages.append(
793
- ChatMessageUser(content=user_content, tool_call_id=message.tool_call_id)
794
- )
812
+ ],
813
+ pending_content + new_user_message_content,
814
+ tool_call_ids + ([message.tool_call_id] if message.tool_call_id else []),
815
+ )
795
816
 
796
817
  else:
797
- messages.append(message)
818
+ return (
819
+ maybe_adding_user_message(messages, pending_content, tool_call_ids)
820
+ + [message],
821
+ [],
822
+ [],
823
+ )
798
824
 
799
- # return messages
800
- return messages
825
+
826
+ ImageContentAccumulator = tuple[list[Content], list[Content]]
827
+ """
828
+ ImageContentAccumulator is a tuple containing two lists of Content objects:
829
+ - The first list contains ContentImages that will be included in a fabricated user message.
830
+ - The second list contains modified content for the tool message with images replaced with text.
831
+ """
832
+
833
+
834
+ def tool_result_image_content_reducer(
835
+ acc: ImageContentAccumulator, content: Content
836
+ ) -> ImageContentAccumulator:
837
+ """
838
+ Reduces the messages Content into two separate lists: one for a fabricated user message that will contain the images and one for modified tool message with the images replaced with text.
839
+
840
+ Returns:
841
+ ImageContentReducer: A tuple containing two lists of Content objects.
842
+ - The first list contains the images that will be included in a fabricated user message.
843
+ - The second list contains modified content for the tool message with images replaced with text.
844
+ """
845
+ new_user_message_content, edited_tool_message_content = acc
846
+ if isinstance(content, ContentImage):
847
+ return new_user_message_content + [content], edited_tool_message_content + [
848
+ ContentText(text="Image content is included below.")
849
+ ]
850
+
851
+ else:
852
+ return new_user_message_content, edited_tool_message_content + [content]
853
+
854
+
855
+ def maybe_adding_user_message(
856
+ messages: list[ChatMessage], content: list[Content], tool_call_ids: list[str]
857
+ ) -> list[ChatMessage]:
858
+ """If content is empty, return messages, otherwise, create a new ChatMessageUser with it and return a new messages list with that message added."""
859
+ return (
860
+ messages + [ChatMessageUser(content=content, tool_call_id=tool_call_ids)]
861
+ if content
862
+ else messages
863
+ )
801
864
 
802
865
 
803
866
  # Functions to reduce consecutive user messages to a single user message -> required for some models
@@ -884,6 +947,7 @@ def handle_sample_message_limit(input: str | list[ChatMessage]) -> None:
884
947
  active_sample_message_limit,
885
948
  set_active_sample_total_messages,
886
949
  )
950
+ from inspect_ai.solver._limit import SampleLimitExceededError
887
951
 
888
952
  total_messages = 1 if isinstance(input, str) else len(input)
889
953
  message_limit = active_sample_message_limit()
@@ -910,6 +974,7 @@ def record_model_usage(model: str, usage: ModelUsage) -> None:
910
974
  active_sample_token_limit,
911
975
  set_active_sample_total_tokens,
912
976
  )
977
+ from inspect_ai.solver._limit import SampleLimitExceededError
913
978
 
914
979
  # record usage
915
980
  set_model_usage(model, usage, sample_model_usage_context_var.get(None))
@@ -291,9 +291,6 @@ class AnthropicAPI(ModelAPI):
291
291
  elif "content filtering" in error:
292
292
  content = "Sorry, but I am unable to help with that request."
293
293
  stop_reason = "content_filter"
294
- else:
295
- content = error
296
- stop_reason = "unknown"
297
294
 
298
295
  if content and stop_reason:
299
296
  return ModelOutput.from_content(
@@ -6,6 +6,7 @@ from ._chain import chain
6
6
  from ._critique import self_critique
7
7
  from ._fork import fork
8
8
  from ._human_agent.agent import human_agent
9
+ from ._limit import SampleLimitExceededError
9
10
  from ._multiple_choice import MultipleChoiceTemplate, multiple_choice
10
11
  from ._plan import Plan, plan
11
12
  from ._prompt import chain_of_thought, prompt_template, system_message, user_message
@@ -37,6 +38,7 @@ __all__ = [
37
38
  "TaskState",
38
39
  "Generate",
39
40
  "MultipleChoiceTemplate",
41
+ "SampleLimitExceededError",
40
42
  ]
41
43
 
42
44
 
@@ -13,6 +13,7 @@ from inspect_ai.solver._chain import chain
13
13
  from inspect_ai.tool._tool import Tool, ToolResult, tool
14
14
  from inspect_ai.tool._tool_with import tool_with
15
15
 
16
+ from ._limit import SampleLimitExceededError
16
17
  from ._prompt import system_message
17
18
  from ._solver import Generate, Solver, solver
18
19
  from ._task_state import TaskState
@@ -167,61 +168,70 @@ def basic_agent(
167
168
  # track attempts
168
169
  attempts = 0
169
170
 
170
- # main loop (state.completed checks message_limit and token_limit)
171
- while not state.completed:
172
- # generate output and append assistant message
173
- state.output = await get_model().generate(
174
- input=state.messages, tools=state.tools, cache=cache
175
- )
176
- state.messages.append(state.output.message)
177
-
178
- # check for context window overflow
179
- if state.output.stop_reason == "model_length":
180
- from inspect_ai.log._transcript import transcript
181
-
182
- transcript().info("Agent terminated: model context window exceeded")
183
- break
184
-
185
- # resolve tools calls (if any)
186
- if state.output.message.tool_calls:
187
- # call tool functions
188
- tool_results = await call_tools(
189
- state.output.message, state.tools, max_output=max_tool_output
171
+ try:
172
+ # main loop (state.completed checks message_limit and token_limit)
173
+ while not state.completed:
174
+ # generate output and append assistant message
175
+ state.output = await get_model().generate(
176
+ input=state.messages, tools=state.tools, cache=cache
190
177
  )
191
- state.messages.extend(tool_results)
192
-
193
- # was an answer submitted?
194
- answer = submission(tool_results)
195
- if answer:
196
- # set the output to the answer for scoring
197
- state.output.completion = answer
198
-
199
- # exit if we are at max_attempts
200
- attempts += 1
201
- if attempts >= max_attempts:
202
- state.completed = True
203
- break
204
-
205
- # exit if the submission is successful
206
- answer_scores = await score(state)
207
- if score_value_fn(answer_scores[0].value) == 1.0:
208
- state.completed = True
209
- break
210
-
211
- # otherwise notify the model that it was incorrect and continue
212
- else:
213
- response_message = (
214
- incorrect_message(state, answer_scores)
215
- if callable(incorrect_message)
216
- else incorrect_message
217
- )
218
- state.messages.append(
219
- ChatMessageUser(content=response_message)
220
- )
221
-
222
- # no tool calls, urge the model to continue
223
- else:
224
- state.messages.append(ChatMessageUser(content=continue_message))
178
+ state.messages.append(state.output.message)
179
+
180
+ # check for context window overflow
181
+ if state.output.stop_reason == "model_length":
182
+ from inspect_ai.log._transcript import transcript
183
+
184
+ transcript().info(
185
+ "Agent terminated: model context window exceeded"
186
+ )
187
+ break
188
+
189
+ # resolve tools calls (if any)
190
+ if state.output.message.tool_calls:
191
+ # call tool functions
192
+ tool_results = await call_tools(
193
+ state.output.message,
194
+ state.tools,
195
+ max_output=max_tool_output,
196
+ )
197
+ state.messages.extend(tool_results)
198
+
199
+ # was an answer submitted?
200
+ answer = submission(tool_results)
201
+ if answer:
202
+ # set the output to the answer for scoring
203
+ state.output.completion = answer
204
+
205
+ # exit if we are at max_attempts
206
+ attempts += 1
207
+ if attempts >= max_attempts:
208
+ state.completed = True
209
+ break
210
+
211
+ # exit if the submission is successful
212
+ answer_scores = await score(state)
213
+ if score_value_fn(answer_scores[0].value) == 1.0:
214
+ state.completed = True
215
+ break
216
+
217
+ # otherwise notify the model that it was incorrect and continue
218
+ else:
219
+ response_message = (
220
+ incorrect_message(state, answer_scores)
221
+ if callable(incorrect_message)
222
+ else incorrect_message
223
+ )
224
+ state.messages.append(
225
+ ChatMessageUser(content=response_message)
226
+ )
227
+
228
+ # no tool calls, urge the model to continue
229
+ else:
230
+ state.messages.append(ChatMessageUser(content=continue_message))
231
+
232
+ # propagate current state along with sample limit exceeded
233
+ except SampleLimitExceededError as ex:
234
+ raise ex.with_state(state)
225
235
 
226
236
  return state
227
237
 
@@ -1,5 +1,7 @@
1
1
  from typing import Literal
2
2
 
3
+ from ._task_state import TaskState
4
+
3
5
 
4
6
  class SampleLimitExceededError(Exception):
5
7
  """Exception raised when a sample limit is exceeded.
@@ -18,9 +20,20 @@ class SampleLimitExceededError(Exception):
18
20
  value: int,
19
21
  limit: int,
20
22
  message: str | None = None,
23
+ state: TaskState | None = None,
21
24
  ) -> None:
22
25
  self.type = type
23
26
  self.value = value
24
27
  self.limit = limit
25
28
  self.message = f"Exceeded {type} limit: {limit:,}"
29
+ self.state = state
26
30
  super().__init__(message)
31
+
32
+ def with_state(self, state: TaskState) -> "SampleLimitExceededError":
33
+ return SampleLimitExceededError(
34
+ self.type,
35
+ value=self.value,
36
+ limit=self.limit,
37
+ message=self.message,
38
+ state=state,
39
+ )
@@ -22,7 +22,6 @@ from inspect_ai.scorer._metric import Score
22
22
  from inspect_ai.scorer._target import Target
23
23
  from inspect_ai.tool import Tool, ToolChoice
24
24
  from inspect_ai.tool._tool_def import ToolDef
25
- from inspect_ai.util._limit import SampleLimitExceededError
26
25
  from inspect_ai.util._store import Store, store_jsonable
27
26
  from inspect_ai.util._store_model import SMT
28
27
 
@@ -173,7 +172,7 @@ class TaskState:
173
172
  self.metadata = metadata
174
173
  """Metadata from the `Sample` for this `TaskState`"""
175
174
 
176
- self._messages: list[ChatMessage] = ChatMessageList(messages)
175
+ self._messages: list[ChatMessage] = ChatMessageList(messages, self)
177
176
  """
178
177
  Chat conversation history for sample.
179
178
 
@@ -272,7 +271,7 @@ class TaskState:
272
271
  @messages.setter
273
272
  def messages(self, messages: list[ChatMessage]) -> None:
274
273
  """Set messages in chat history."""
275
- self._messages = ChatMessageList(messages)
274
+ self._messages = ChatMessageList(messages, self)
276
275
 
277
276
  @property
278
277
  def max_messages(self) -> int | None:
@@ -319,8 +318,32 @@ class TaskState:
319
318
 
320
319
  @property
321
320
  def completed(self) -> bool:
322
- """Is the task completed."""
323
- return self._completed
321
+ """Is the task completed.
322
+
323
+ Additionally, checks message and token limits and raises if they are exceeded.
324
+ """
325
+ from inspect_ai.log._samples import set_active_sample_total_messages
326
+
327
+ from ._limit import SampleLimitExceededError
328
+
329
+ # update messages
330
+ set_active_sample_total_messages(len(self.messages))
331
+
332
+ if self._completed:
333
+ return True
334
+ elif self.message_limit and len(self.messages) >= self.message_limit:
335
+ raise SampleLimitExceededError(
336
+ "message",
337
+ value=len(self.messages),
338
+ limit=self.message_limit,
339
+ state=self,
340
+ )
341
+ elif self.token_limit and self.token_usage >= self.token_limit:
342
+ raise SampleLimitExceededError(
343
+ "token", value=self.token_usage, limit=self.token_limit, state=self
344
+ )
345
+ else:
346
+ return self._completed
324
347
 
325
348
  @completed.setter
326
349
  def completed(self, completed: bool) -> None:
@@ -403,7 +426,8 @@ def sample_jsonable(sample: Sample) -> dict[str, Any]:
403
426
 
404
427
 
405
428
  class ChatMessageList(list[ChatMessage]):
406
- def __init__(self, iterable: Iterable[ChatMessage]):
429
+ def __init__(self, iterable: Iterable[ChatMessage], parent_state: TaskState):
430
+ self.parent_state = parent_state
407
431
  items, length = self._iterable_length(iterable)
408
432
  self._check_size(length)
409
433
  super().__init__(items)
@@ -411,12 +435,18 @@ class ChatMessageList(list[ChatMessage]):
411
435
  def _check_size(self, additional_items: int = 1) -> None:
412
436
  from inspect_ai.log._samples import active_sample_message_limit
413
437
 
438
+ from ._limit import SampleLimitExceededError
439
+
414
440
  messages_limit = active_sample_message_limit()
415
441
  if messages_limit is not None:
416
442
  messages = len(self) + additional_items
417
443
  if messages > messages_limit:
418
444
  raise SampleLimitExceededError(
419
- "message", value=messages, limit=messages_limit
445
+ "message",
446
+ value=messages,
447
+ limit=messages_limit,
448
+ message=None,
449
+ state=self.parent_state,
420
450
  )
421
451
 
422
452
  def append(self, item: ChatMessage) -> None:
@@ -345,7 +345,9 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
345
345
  if sandbox_env:
346
346
  store = store_as(WebBrowserStore)
347
347
  if not store.session_id:
348
- result = await sandbox_env.exec(["python3", WEB_CLIENT_NEW_SESSION])
348
+ result = await sandbox_env.exec(
349
+ ["python3", WEB_CLIENT_NEW_SESSION], timeout=180
350
+ )
349
351
 
350
352
  if not result.success:
351
353
  raise RuntimeError(
@@ -33,8 +33,6 @@ RUN apt-get update && \
33
33
 
34
34
  # Userland apt-get'able apps
35
35
  RUN apt-get install -y --no-install-recommends \
36
- # A simple image viewer.
37
- xpaint \
38
36
  # A calculator application.
39
37
  galculator && \
40
38
  apt-get clean
@@ -78,7 +76,7 @@ RUN useradd -m -s /bin/bash -d $HOME $USERNAME
78
76
  RUN echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
79
77
  USER ${USERNAME}
80
78
  WORKDIR $HOME
81
- COPY --chown=$USERNAME:$USERNAME image_home_dir/ $HOME
79
+ ADD --chown=$USERNAME:$USERNAME image_home_dir/ $HOME
82
80
 
83
81
  # configure Firefox to skip all 'first run' UI
84
82
  RUN mkdir -p $HOME/.mozilla/firefox-esr/profile.default && \
@@ -5,7 +5,7 @@ echo "starting vnc"
5
5
  -forever \
6
6
  -shared \
7
7
  -wait 50 \
8
- -cursor most \
8
+ -multiptr \
9
9
  -cursor arrow \
10
10
  -rfbport 5900 \
11
11
  -nopw \
@@ -0,0 +1,10 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+
3
+ <channel name="xfce4-screensaver" version="1.0">
4
+ <property name="saver" type="empty">
5
+ <property name="mode" type="int" value="0" />
6
+ </property>
7
+ <property name="lock" type="empty">
8
+ <property name="enabled" type="bool" value="false" />
9
+ </property>
10
+ </channel>
@@ -3,7 +3,6 @@ from inspect_ai._util.trace import trace_action, trace_message
3
3
  from ._concurrency import concurrency
4
4
  from ._console import input_screen
5
5
  from ._display import DisplayType, display_type
6
- from ._limit import SampleLimitExceededError
7
6
  from ._panel import InputPanel, input_panel
8
7
  from ._resource import resource
9
8
  from ._sandbox import (
@@ -37,7 +36,6 @@ __all__ = [
37
36
  "input_panel",
38
37
  "input_screen",
39
38
  "OutputLimitExceededError",
40
- "SampleLimitExceededError",
41
39
  "resource",
42
40
  "subprocess",
43
41
  "SandboxEnvironment",
@@ -32,6 +32,7 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
32
32
  for fn in [
33
33
  test_read_and_write_file_text,
34
34
  test_read_and_write_file_binary,
35
+ test_write_file_text_utf,
35
36
  test_read_and_write_file_including_directory_absolute,
36
37
  test_read_and_write_file_including_directory_relative,
37
38
  test_read_file_zero_length,
@@ -64,33 +65,39 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
64
65
 
65
66
 
66
67
  async def _cleanup_file(sandbox_env: SandboxEnvironment, filename: str) -> None:
67
- res = await sandbox_env.exec(["rm", filename])
68
+ res = await sandbox_env.exec(["rm", "-f", "--", filename])
68
69
  assert res.success
69
70
 
70
71
 
71
72
  async def test_read_and_write_file_text(sandbox_env: SandboxEnvironment) -> None:
72
- await sandbox_env.write_file(
73
- "test_read_and_write_file_text.file", "great #content\nincluding newlines"
74
- )
75
- written_file_string = await sandbox_env.read_file(
76
- "test_read_and_write_file_text.file", text=True
77
- )
73
+ file_name = "test_read_and_write_file_text.file"
74
+ await sandbox_env.write_file(file_name, "great #content\nincluding newlines")
75
+ written_file_string = await sandbox_env.read_file(file_name, text=True)
78
76
  assert "great #content\nincluding newlines" == written_file_string, (
79
77
  f"unexpected content: [{written_file_string}]"
80
78
  )
81
- await _cleanup_file(sandbox_env, "test_read_and_write_file_text.file")
79
+ await _cleanup_file(sandbox_env, file_name)
80
+
81
+
82
+ async def test_write_file_text_utf(sandbox_env: SandboxEnvironment) -> None:
83
+ utf_content = "✨☽︎✨🌞︎︎✨🚀✨"
84
+ file_name = "test_write_file_text_utf.file"
85
+ await sandbox_env.write_file(file_name, utf_content)
86
+ file_with_utf_content = await sandbox_env.read_file(file_name, text=True)
87
+ assert isinstance(file_with_utf_content, str)
88
+ assert file_with_utf_content == utf_content
89
+ await _cleanup_file(sandbox_env, file_name)
82
90
 
83
91
 
84
92
  async def test_read_and_write_file_binary(sandbox_env: SandboxEnvironment) -> None:
93
+ file_name = "test_read_and_write_file_binary.file"
85
94
  await sandbox_env.write_file(
86
- "test_read_and_write_file_binary.file", b"\xc3\x28"
95
+ file_name, b"\xc3\x28"
87
96
  ) # invalid UTF-8 from https://stackoverflow.com/a/17199164/116509
88
97
 
89
- written_file_bytes = await sandbox_env.read_file(
90
- "test_read_and_write_file_binary.file", text=False
91
- )
98
+ written_file_bytes = await sandbox_env.read_file(file_name, text=False)
92
99
  assert b"\xc3\x28" == written_file_bytes
93
- await _cleanup_file(sandbox_env, "test_read_and_write_file_binary.file")
100
+ await _cleanup_file(sandbox_env, file_name)
94
101
 
95
102
 
96
103
  async def test_read_and_write_file_including_directory_absolute(
@@ -101,6 +108,7 @@ async def test_read_and_write_file_including_directory_absolute(
101
108
  written_file_string = await sandbox_env.read_file(file_name, text=True)
102
109
  assert "absolutely enjoying being in a directory" == written_file_string
103
110
  await _cleanup_file(sandbox_env, file_name)
111
+ await sandbox_env.exec(["rmdir", "/tmp/test_rw_including_directory_absolute"])
104
112
 
105
113
 
106
114
  async def test_read_and_write_file_including_directory_relative(
@@ -111,20 +119,23 @@ async def test_read_and_write_file_including_directory_relative(
111
119
  written_file_string = await sandbox_env.read_file(file_name, text=True)
112
120
  assert "relatively enjoying being in a directory" == written_file_string
113
121
  await _cleanup_file(sandbox_env, file_name)
122
+ await sandbox_env.exec(["rmdir", "test_rw_including_directory_relative"])
114
123
 
115
124
 
116
125
  async def test_read_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
117
- await sandbox_env.exec(["touch", "zero_length_file.file"])
118
- zero_length = await sandbox_env.read_file("zero_length_file.file", text=True)
126
+ file_name = "zero_length_file.file"
127
+ await sandbox_env.exec(["touch", file_name])
128
+ zero_length = await sandbox_env.read_file(file_name, text=True)
119
129
  assert isinstance(zero_length, str)
120
130
  assert zero_length == ""
131
+ await _cleanup_file(sandbox_env, file_name)
121
132
 
122
133
 
123
134
  async def test_read_file_not_found(sandbox_env: SandboxEnvironment) -> None:
124
- file = "nonexistent"
135
+ file_name = "nonexistent"
125
136
  with Raises(FileNotFoundError) as e_info:
126
- await sandbox_env.read_file(file, text=True)
127
- assert file in str(e_info.value)
137
+ await sandbox_env.read_file(file_name, text=True)
138
+ assert file_name in str(e_info.value)
128
139
 
129
140
 
130
141
  async def test_read_file_not_allowed(sandbox_env: SandboxEnvironment) -> None:
@@ -134,22 +145,23 @@ async def test_read_file_not_allowed(sandbox_env: SandboxEnvironment) -> None:
134
145
  with Raises(PermissionError) as e_info:
135
146
  await sandbox_env.read_file(file_name, text=True)
136
147
  assert file_name in str(e_info.value)
148
+ await sandbox_env.exec(["chmod", "+r", file_name])
137
149
  await _cleanup_file(sandbox_env, file_name)
138
150
 
139
151
 
140
152
  async def test_read_file_is_directory(sandbox_env: SandboxEnvironment) -> None:
141
- file = "/etc"
153
+ file_name = "/etc"
142
154
  with Raises(IsADirectoryError) as e_info:
143
- await sandbox_env.read_file(file, text=True)
155
+ await sandbox_env.read_file(file_name, text=True)
144
156
  assert "directory" in str(e_info.value)
145
157
 
146
158
 
147
159
  async def test_read_file_nonsense_name(
148
160
  sandbox_env: SandboxEnvironment,
149
161
  ) -> None:
150
- file = "https:/en.wikipedia.org/wiki/Bart%C5%82omiej_Kasprzykowski"
162
+ file_name = "https:/en.wikipedia.org/wiki/Bart%C5%82omiej_Kasprzykowski"
151
163
  with Raises(FileNotFoundError) as e_info:
152
- await sandbox_env.read_file(file, text=True)
164
+ await sandbox_env.read_file(file_name, text=True)
153
165
  assert "wikipedia" in str(e_info.value)
154
166
 
155
167
 
@@ -159,24 +171,28 @@ async def test_read_file_limit(sandbox_env: SandboxEnvironment) -> None:
159
171
  # Patch limit down to 1KiB for the test to save us from writing a 100 MiB file.
160
172
  with mock.patch.object(SandboxEnvironmentLimits, "MAX_READ_FILE_SIZE", 1024):
161
173
  with Raises(OutputLimitExceededError) as e_info:
162
- await sandbox_env.read_file("large.file", text=True)
174
+ await sandbox_env.read_file(file_name, text=True)
163
175
  assert "limit of 100 MiB was exceeded" in str(e_info.value)
164
176
  await _cleanup_file(sandbox_env, file_name)
165
177
 
166
178
 
167
179
  async def test_write_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
168
- await sandbox_env.write_file("zero_length_file.file", "")
169
- zero_length = await sandbox_env.read_file("zero_length_file.file", text=True)
180
+ file_name = "zero_length_file.file"
181
+ await sandbox_env.write_file(file_name, "")
182
+ zero_length = await sandbox_env.read_file(file_name, text=True)
170
183
  assert isinstance(zero_length, str)
171
184
  assert zero_length == ""
185
+ await _cleanup_file(sandbox_env, file_name)
172
186
 
173
187
 
174
188
  async def test_write_file_space(sandbox_env: SandboxEnvironment) -> None:
175
- space = "✨☽︎✨🌞︎︎✨🚀✨"
176
- await sandbox_env.write_file("file with space.file", space)
177
- file_with_space = await sandbox_env.read_file("file with space.file", text=True)
189
+ space = "to the moon"
190
+ file_name = "file with space.file"
191
+ await sandbox_env.write_file(file_name, space)
192
+ file_with_space = await sandbox_env.read_file(file_name, text=True)
178
193
  assert isinstance(file_with_space, str)
179
194
  assert file_with_space == space
195
+ await _cleanup_file(sandbox_env, file_name)
180
196
 
181
197
 
182
198
  async def test_write_file_is_directory(
@@ -192,6 +208,9 @@ async def test_write_file_is_directory(
192
208
  "content cannot go in a directory, dummy",
193
209
  )
194
210
  assert "directory" in str(e_info.value)
211
+ await sandbox_env.exec(
212
+ ["rm", "-rf", "/tmp/inspect_ai_test_write_file_is_directory"]
213
+ )
195
214
 
196
215
 
197
216
  async def test_write_file_without_permissions(
@@ -203,6 +222,8 @@ async def test_write_file_without_permissions(
203
222
  with Raises(PermissionError) as e_info:
204
223
  await sandbox_env.write_file(file_name, "this won't stick")
205
224
  assert file_name in str(e_info.value)
225
+ await sandbox_env.exec(["chmod", "+w", file_name])
226
+ await _cleanup_file(sandbox_env, file_name)
206
227
 
207
228
 
208
229
  async def test_write_file_exists(
@@ -213,6 +234,7 @@ async def test_write_file_exists(
213
234
  await sandbox_env.write_file(file_name, "altered content")
214
235
  altered_content = await sandbox_env.read_file(file_name, text=True)
215
236
  assert altered_content == "altered content"
237
+ await _cleanup_file(sandbox_env, file_name)
216
238
 
217
239
 
218
240
  async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
@@ -305,6 +327,7 @@ async def test_cwd_absolute(sandbox_env: SandboxEnvironment) -> None:
305
327
  current_dir_contents = (await sandbox_env.exec(["ls"], cwd=cwd_directory)).stdout
306
328
  assert "test_cwd_absolute.file" in current_dir_contents
307
329
  await _cleanup_file(sandbox_env, file_name)
330
+ await sandbox_env.exec(["rmdir", cwd_directory])
308
331
 
309
332
 
310
333
  async def test_exec_stdout_is_limited(sandbox_env: SandboxEnvironment) -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: inspect_ai
3
- Version: 0.3.60
3
+ Version: 0.3.61
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Safety Institute
6
6
  License: MIT License
@@ -68,7 +68,7 @@ Requires-Dist: pytest-asyncio; extra == "dev"
68
68
  Requires-Dist: pytest-cov; extra == "dev"
69
69
  Requires-Dist: pytest-dotenv; extra == "dev"
70
70
  Requires-Dist: pytest-xdist; extra == "dev"
71
- Requires-Dist: ruff==0.9.3; extra == "dev"
71
+ Requires-Dist: ruff==0.9.4; extra == "dev"
72
72
  Requires-Dist: textual-dev>=0.86.2; extra == "dev"
73
73
  Requires-Dist: types-PyYAML; extra == "dev"
74
74
  Requires-Dist: types-beautifulsoup4; extra == "dev"
@@ -3,7 +3,7 @@ inspect_ai/__main__.py,sha256=oWX4YwDZDg3GS3-IG0yPGoSEOfSzWihELg7QmrUlxjM,67
3
3
  inspect_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  inspect_ai/_cli/cache.py,sha256=nOX9ysB3rZ-V8b_ryTpkgpoAynNlq4Op_fIqAIl4EVg,3910
5
5
  inspect_ai/_cli/common.py,sha256=5smokbnQGpOe72WXlpDy1QWuWbjxILKnl5UPvgfW0Yk,3678
6
- inspect_ai/_cli/eval.py,sha256=xxYGk5ewUUmGPIwJ1wp8TcCjxvttp5FxovhgsPfaFL0,31992
6
+ inspect_ai/_cli/eval.py,sha256=LW2KH4iENl5QF10ItTwHzHM7v26RPS7-1XevaQjSa5E,31968
7
7
  inspect_ai/_cli/info.py,sha256=d5W7VA5buinGcsdQjWqlsMM6iSNNGRUHQrr4JS2k8nk,1749
8
8
  inspect_ai/_cli/list.py,sha256=GreVEhJRpagiCpzVc3FSGhcdpTq9B8Jh--mfgs4ueFQ,2454
9
9
  inspect_ai/_cli/log.py,sha256=boSzHZkiabhnYWHLRVsZVENCryG-MDaNHIIbpSp0Mcs,5729
@@ -55,11 +55,11 @@ inspect_ai/_eval/task/__init__.py,sha256=yhBZbjh67QfHy7IdyFGMyQIfBflQLPLkhmz5eEv
55
55
  inspect_ai/_eval/task/constants.py,sha256=quAKMw-4-3xKd1T_KwXCZvHYoKRXt1ZGuaHbBcWJwnA,72
56
56
  inspect_ai/_eval/task/epochs.py,sha256=Ci7T6CQniSOTChv5Im2dCdSDrP-5hq19rV6iJ2uBcH8,700
57
57
  inspect_ai/_eval/task/error.py,sha256=gJnd8X7LHpPz5zcOq_az6ONZICGJ0_VpSz9yhF0yRyY,1233
58
- inspect_ai/_eval/task/generate.py,sha256=Edm-_6Wp1mkb7XpGkfTAqobWPfjJeWB0sb8-76UjNrc,1999
58
+ inspect_ai/_eval/task/generate.py,sha256=lm066fbZOX7o3NB57rbwwec-ZaIFE745fiuacPCmo20,2352
59
59
  inspect_ai/_eval/task/images.py,sha256=Tg3I7d7ThCYP_Lf-H5JA7xH-sH2W-m1c1YfswDwplt4,3949
60
60
  inspect_ai/_eval/task/log.py,sha256=TjyLglP-3gMMDkfmxYbsxTvGIWY9FEyVtB09Fyrm_PA,6166
61
61
  inspect_ai/_eval/task/results.py,sha256=GkWlgHexm0BPyxrmqLY6YSXz3AqXYMVLXNnKCYfW7ic,15785
62
- inspect_ai/_eval/task/run.py,sha256=rCV98hW3Pnz9Mu6L9h8QLs79Khy9D0lXiNyrR5qW1V4,34607
62
+ inspect_ai/_eval/task/run.py,sha256=FdTysq-2dMFEXWIs7m8ONbXXDabx9V2saJecBurlQKs,34894
63
63
  inspect_ai/_eval/task/rundir.py,sha256=QXetLfqi1lRo-PcIuu7maQpVO57c2ebnsjfZk0lsAFk,2001
64
64
  inspect_ai/_eval/task/sandbox.py,sha256=kwG1SQawZFDle3hzqMe4hSdnuvShkKsmMIrcC2gnYHM,6120
65
65
  inspect_ai/_eval/task/task.py,sha256=ahlM-0MJc_4wFCWTGQIsnapUn0epka_9jRVK-xdapHY,13800
@@ -83,7 +83,7 @@ inspect_ai/_util/file.py,sha256=49NXD2R_qVDMScBfifiHKhQ6ypB4OyX6cA3ym1k0-1U,1226
83
83
  inspect_ai/_util/format.py,sha256=RWmK4JcB7NwRy4rXtUa1JJ52_KhxcvREhMMCFVHvzwQ,1179
84
84
  inspect_ai/_util/git.py,sha256=nHCtZMLjMyFjSC_9bksBXeFz4xqxZfY6lfXr_qg2n1E,760
85
85
  inspect_ai/_util/hash.py,sha256=N25e4B_Lp9ZFSQIrtMO4x-KrZopJL6gKhs-NO41pxzA,289
86
- inspect_ai/_util/hooks.py,sha256=eIvQCc3L3_sPUPHY2tsv20IanmvOvoa7cAaDuP_aiYI,3780
86
+ inspect_ai/_util/hooks.py,sha256=8QnHCQQY_2XMYPkiPvixUgFY0E_niZvQhQDMI-eCdhM,4353
87
87
  inspect_ai/_util/html.py,sha256=X62FY8gpEJ2ZQoDu2y8aQAbiBUIHKsd7DA9rWCIleo8,168
88
88
  inspect_ai/_util/http.py,sha256=c4yvH48ZkETZ7sNDuNzBR0NUS4r-6WzCaolW9my13ns,3628
89
89
  inspect_ai/_util/images.py,sha256=W7QJHyzuXhfy3VsLhKTzddSo1g69O9RxnTyhat48Wyo,1312
@@ -127,7 +127,7 @@ inspect_ai/_view/www/favicon.svg,sha256=b9AHYZaO2zBzeKH6G4PwXZMGGW_UxY0omKHam-c9
127
127
  inspect_ai/_view/www/index.html,sha256=LDaPH75d5TlepHfIY3wQG0aBcHTpa90Bhx0er_ES45s,910
128
128
  inspect_ai/_view/www/jsconfig.json,sha256=vt1gPPYezOFeV9nofA93CmVJAKGb1QeKGuyvEn1CXgk,383
129
129
  inspect_ai/_view/www/log-schema.json,sha256=4PRhm3dJvhwPhEZCDUFMwzHcIsPNLGGPuuqXHIwSUxk,95006
130
- inspect_ai/_view/www/package.json,sha256=zQ4TrahqhBIsJAiiSQ--dW9KhH61-IBNTqtPRzS3tU4,1189
130
+ inspect_ai/_view/www/package.json,sha256=y2cHvK7QKQcVk2v66ldn-syN649xnAjTVHju4QFJY2s,1189
131
131
  inspect_ai/_view/www/postcss.config.cjs,sha256=jQ-QIJFuBVUTZXbFpOvUJk4MsLBr_yPOQbRwHD0ZohE,340
132
132
  inspect_ai/_view/www/tsconfig.json,sha256=JjaLdpt13sgJYHDWdThRIr0gHzpRzEOKL4E2awt9r9s,277
133
133
  inspect_ai/_view/www/vite.config.js,sha256=jmSUrV0YzGCcinfyKcmy2bDRUE10mmPUxMAen0bX8jY,1103
@@ -304,17 +304,17 @@ inspect_ai/log/_recorders/recorder.py,sha256=yvW_D99QxUQmnF5EiGsWIVetBXdssMUcsq5
304
304
  inspect_ai/model/__init__.py,sha256=gYBnBjfEEG_GQhu_lhwQ2tW9U4nXLW0GtRJNGfwYy3k,2121
305
305
  inspect_ai/model/_cache.py,sha256=IQXhMas58Pjdma894GHGTtHYpmnf_Ojz_eE0kHaQVPs,13567
306
306
  inspect_ai/model/_call_tools.py,sha256=Vy3uXVpHY9b0gQrXu3KGmvEOWXntSCxbpJ0elTAQ0So,18128
307
- inspect_ai/model/_chat_message.py,sha256=21x9MJVyAzKM_XO72X6fG6Ei1Fy8xklSdAgdmDS_RLU,4442
307
+ inspect_ai/model/_chat_message.py,sha256=rJ1_pBn0sQdsr4R_DQUrg2PvRzi4VaYc2N_ttikuFy0,4454
308
308
  inspect_ai/model/_conversation.py,sha256=SFumVE67sq-mgSfqaZw2xwE8kow5NxF6FU8VbXsvc8k,1988
309
309
  inspect_ai/model/_generate_config.py,sha256=WjlFH6WtfyIpF6TMcSFmIUxyyB0D4quZLIqMd82oEW8,8757
310
310
  inspect_ai/model/_image.py,sha256=kpO2Bn_-c-dK80HuPOPH1eSNmcoc39kofwf4yTTiTFE,477
311
- inspect_ai/model/_model.py,sha256=N8keDFLPXps-3O07GrPC1ZocjdOnaaNI6tbkUsN5clQ,34114
311
+ inspect_ai/model/_model.py,sha256=9rLk3F33By0Gc8d7_LdS9r4sTth11QxC5tGu3m_4suo,37020
312
312
  inspect_ai/model/_model_call.py,sha256=r6ObHZwm7jy1qX2qxvAP6iPV3BhdGThL-VH-QEawQhA,2017
313
313
  inspect_ai/model/_model_output.py,sha256=X6dEH3L4XPs5H8cWQeVF8tlkDMNRqP3StJlPA_z140E,7029
314
314
  inspect_ai/model/_openai.py,sha256=XhYu_Rdc5jLGkrgdIkbniNWlQVBx9iYj2DdDTK1U12o,12871
315
315
  inspect_ai/model/_registry.py,sha256=Cr2y32EqLnOqLbSWoXHVK4ivTTzCUhJuACxoTyPt8kY,2032
316
316
  inspect_ai/model/_render.py,sha256=rWypNUjgrH4NGp0r-ESAze9gZz7lYNjheEP438vRYZE,922
317
- inspect_ai/model/_providers/anthropic.py,sha256=OJPjOutTTkgMU54bfEIRVbnweGa4UwXsZgYkacYt20M,24752
317
+ inspect_ai/model/_providers/anthropic.py,sha256=4a07C0PGxWyyHGZuKol6EXULsDC7lphqyJgSdaOJ1gY,24674
318
318
  inspect_ai/model/_providers/azureai.py,sha256=moIC4-um_Qs3iXbr4DlP6LUL924aF-s5YyQqF0V5ye4,14037
319
319
  inspect_ai/model/_providers/bedrock.py,sha256=BiSEQYlGLKqaadGUJxSQuule3JPLZbAIjfhJ36DYQ8k,23357
320
320
  inspect_ai/model/_providers/cloudflare.py,sha256=h6ubjf0kxyMM7Aj2tm68tWa-2R7RAXNGp1O6KMvi0Gw,4143
@@ -359,16 +359,17 @@ inspect_ai/scorer/_reducer/__init__.py,sha256=ntoSXbbBia6gN3Uk3tQFQ8lSt8IBSRvwM5
359
359
  inspect_ai/scorer/_reducer/reducer.py,sha256=g8F7sTm_FmPcLdavOGv0YuvqZ5_nz2irmQVq37h2Y5A,11494
360
360
  inspect_ai/scorer/_reducer/registry.py,sha256=J2tvuuxf4jBC09_SCBZg99Qb2qQUWG8STEsw7ASWpXQ,5388
361
361
  inspect_ai/scorer/_reducer/types.py,sha256=uimvzIBRK7x1Dof77gsHYe9PU3hekB1opm9DTAa4sL4,340
362
- inspect_ai/solver/__init__.py,sha256=v3lps_q6SU4ZHklFjG-vgy92FcOK3jynk9zPs-nBwa4,3356
363
- inspect_ai/solver/_basic_agent.py,sha256=uJkjMsBP6SycnJxyXBOitU4AE8dBBCTKEbEZCz0NBuM,9607
362
+ inspect_ai/solver/__init__.py,sha256=I8lmfnohTUYyW3aR7sCQhkOBh9R75a0-QrYkR3hG76E,3433
363
+ inspect_ai/solver/_basic_agent.py,sha256=DaPMu_2SuoBamYwd54soxGaW2lcK21Siuf0qYW9Ps9w,10134
364
364
  inspect_ai/solver/_chain.py,sha256=F-2ZHE2KOlDAIgH1_Q23whUMH5TjYGvCHhcOgbRxe7I,2234
365
365
  inspect_ai/solver/_critique.py,sha256=ddO8J7VqSEsT6kofawpn3PrcUpLjLUMFmJi0hocDZpI,3504
366
366
  inspect_ai/solver/_fork.py,sha256=Ge1PwpCHjeZhm2CLAWKss2uFuQd9BGzVinLOW6UOnfE,2880
367
+ inspect_ai/solver/_limit.py,sha256=zaZseJgjbJaBnGdXQHQ5MpU4tzgUyD8FzLvJMGDk3jA,1122
367
368
  inspect_ai/solver/_multiple_choice.py,sha256=tSLrwqAHuvX_eccM6OXiRmlx5bx_3g1LcB8GDWWV9C0,11024
368
369
  inspect_ai/solver/_plan.py,sha256=Dp1DDTtGe2iTo8CYWKqCOdfBFfTK_0wi2JzIr6qrikI,7042
369
370
  inspect_ai/solver/_prompt.py,sha256=PwGtLQ-dnCzxN_74H1NDT7LAhUuuiw2-c6ZSyXgBFgQ,3953
370
371
  inspect_ai/solver/_solver.py,sha256=Q-FrkfD97_TufEzuQxzr_LgziCdQipIuy778NWq7vVM,9008
371
- inspect_ai/solver/_task_state.py,sha256=D2rpC7lycJH601o6xHNrF3LIWgMGFaXkQ1b_38pF-2U,15169
372
+ inspect_ai/solver/_task_state.py,sha256=Timv9_961yPNjh07BBUL0QeHeLLKx6b-QBsN1ocnEvY,16237
372
373
  inspect_ai/solver/_transcript.py,sha256=gkH9CC5gYbz7ZzrFD0TkjtKYjWxQP5EthJOkq8NXDOc,1049
373
374
  inspect_ai/solver/_use_tools.py,sha256=W7muO8r9eThXydm1GjFF-f6gip9AhzhgAG2GHSE5EpM,2011
374
375
  inspect_ai/solver/_util.py,sha256=pthrf-CzC6FnQYSUFLXTYM4wFEJptZrh5POTmV-Jtow,446
@@ -402,7 +403,7 @@ inspect_ai/tool/_tool_with.py,sha256=YBHjhT9PuM2QyUxD_BzhgqFPFfUPoRrTIpXMBXMXlFY
402
403
  inspect_ai/tool/_tools/_execute.py,sha256=DkFlvUTvI595H1zH5IKArhbyBo8YZWqq9tvoUMdvlaw,2823
403
404
  inspect_ai/tool/_tools/_web_search.py,sha256=YqZ3E65ssdq1X2NSH9Mqt5INXdPVQOdKa3PbKi7XjAY,7828
404
405
  inspect_ai/tool/_tools/_web_browser/__init__.py,sha256=dnnzy96pcvMvxD1OGg4hG-doL7Ru7WH0i25Sb9VIXwE,65
405
- inspect_ai/tool/_tools/_web_browser/_web_browser.py,sha256=PFBXaN18HNKslJzPlMgs_p2fNgPOClbjhYzD_qa_x9M,15541
406
+ inspect_ai/tool/_tools/_web_browser/_web_browser.py,sha256=Bzb5RtxlQuxGgbkhxyaUTaj6VdNClgePBl5IuX2JJhY,15584
406
407
  inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile,sha256=Sh1Ht5oBuxZC_8hLzw877CIvM9me_8Q0MxMemR5E_js,431
407
408
  inspect_ai/tool/_tools/_web_browser/_resources/README.md,sha256=RAMe6uFUYepkPSqpdCuag0nqASuFEONDI7jOHagYfuI,2607
408
409
  inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py,sha256=PuOOeF5rDjN9tz-kRQ_UZUXj7MzrjwuFEdhVIYcCcQw,9628
@@ -423,16 +424,16 @@ inspect_ai/tool/beta/_computer/__init__.py,sha256=fq4BSM4aDhtEtE4279xm47NiO6vyiZ
423
424
  inspect_ai/tool/beta/_computer/_common.py,sha256=6XK6MBu2ZiRCao_eMlZdjXEvTmbeKQRN0K-8MtBPsk4,4059
424
425
  inspect_ai/tool/beta/_computer/_computer.py,sha256=2R-3GLoSvQn8b0rVPur3jMzaRK4nS6i_sDwzicj5XJ8,7433
425
426
  inspect_ai/tool/beta/_computer/_computer_split.py,sha256=H3DVCJqpHp_2ra85W_z9s5r-oHTVWwctuEq5fDdy2T4,5588
426
- inspect_ai/tool/beta/_computer/_resources/Dockerfile,sha256=CsmxeL8nO58fzKKpjFaAKiMVyMcZxWVbLUQsqXOvKNo,3545
427
+ inspect_ai/tool/beta/_computer/_resources/Dockerfile,sha256=aLdS_UK2-bXiDQcfhGjVytPUQR1lb_WAQN8x4Ssa7_I,3498
427
428
  inspect_ai/tool/beta/_computer/_resources/README.md,sha256=5JDNaGJ-Ebq6Io57ANFIqgjPoh11aGDSrrgrhyfiqxU,1657
428
429
  inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh,sha256=IR8sE-b22YO7lwzdDiyjhLTJWIf0X__wA8WE98dwkwM,394
429
430
  inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh,sha256=PAbMgSvprnLvbj8A8a59o_yDfm-jiCXxBxsPb004Bf8,383
430
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh,sha256=NRTMEL5TTrEFjKK82ZMEqkHrhYIGIlos80CoElQgWoU,996
431
+ inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh,sha256=JFcW46u2ioDpGLptmUOMaqtt2YvuFhCTB42cyWRmo8c,993
431
432
  inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh,sha256=w_27I4o7usP8SUMzP3lrXeNuISslyy1aywkgpm_2l4Q,209
432
433
  inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh,sha256=hd2naWFFpm3S0cWZ6Lhlpm6KD3L6-g8Zw2dgxchXMUg,1118
434
+ inspect_ai/tool/beta/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml,sha256=jNgaNqBCngQlykTlLhmmdc_LLOrH2AMk7pUpLkbCjMY,312
433
435
  inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop,sha256=Odm77RSEiTlMx7cY8odUCO2a8fvIUwHcpEUanpHzbL0,181
434
436
  inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop,sha256=jYYu8pcdIhFCC_3cEgO-0z0A6eQO2WQkIVViebSBbpA,184
435
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop,sha256=T093gZ3B2aXNd0yo6J31rJ0HeE3ROXPCbgAWxZqtjDA,158
436
437
  inspect_ai/tool/beta/_computer/_resources/tool/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
437
438
  inspect_ai/tool/beta/_computer/_resources/tool/_logger.py,sha256=owkNYe9lyShTLXoMqhK4Qtzacnt5FvSxN8Wqf2MO5XM,652
438
439
  inspect_ai/tool/beta/_computer/_resources/tool/_run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
@@ -440,12 +441,11 @@ inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py,sha256=cd6JNFhwyI
440
441
  inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py,sha256=rLduqd6JduoM9nMQk2-q7lpK4TCasz2F6_6mexquInI,9566
441
442
  inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py,sha256=0ehJuuUO6AlWUZKt3TyUbWQuwyBmkpsBbHxizZI_0GQ,2574
442
443
  inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
443
- inspect_ai/util/__init__.py,sha256=4I9qA1900NIJNYjnGtHFHVoXkWv89pO6h10ePQ2Ohe8,1465
444
+ inspect_ai/util/__init__.py,sha256=Nbr5h9EDqnUFqj1SSm5hJccHp_sz2YB1SCZgFS0NYDk,1388
444
445
  inspect_ai/util/_concurrency.py,sha256=Olzk259NPeSXIy5LzID_WEVGnaW6p5CBCBrmlZUYufM,2227
445
446
  inspect_ai/util/_console.py,sha256=V1XkIoKcNZo0SgRUOv15zJAWz6-zV6267hC4Oldj8oY,1237
446
447
  inspect_ai/util/_conversation.py,sha256=KzqvKfj1tB14cgARZjYyIVG2EpuE-EZKqLGAPIXv1Xs,784
447
448
  inspect_ai/util/_display.py,sha256=IUVyzS0PtFo9LeRW_EWvDv7tkGy1rsZGBjqg63uOPOs,1591
448
- inspect_ai/util/_limit.py,sha256=HMgembPprMvJFeFQy82Gw_BkPX4mqYBP1mGu-aA0n5g,751
449
449
  inspect_ai/util/_panel.py,sha256=bSXXV1LDVMt8DeGWEYTfEm3iMz9I02sX5xpBSVUVRdQ,3072
450
450
  inspect_ai/util/_resource.py,sha256=MMmtTKtt78pDIp9Uc_OxJom_q8mcKozVqt8kosKRJt0,3420
451
451
  inspect_ai/util/_store.py,sha256=QemJe2M-RK6zSFNcd07_92XFjvNtWKgHzBr5eT3KF1I,3786
@@ -459,7 +459,7 @@ inspect_ai/util/_sandbox/environment.py,sha256=Qo7ne28L6fn3igo2Gd0H1lz4vP60IdJGS
459
459
  inspect_ai/util/_sandbox/limits.py,sha256=K-GjKfSugOq8KP0wW_oF6qFrXsOnMV0C88QUWkjPJ9o,2164
460
460
  inspect_ai/util/_sandbox/local.py,sha256=NkHnR_e7s7RFsBdwfaSR7Yzp6lSUc7Em0Pc9_CFuN4c,3534
461
461
  inspect_ai/util/_sandbox/registry.py,sha256=mQwWwqzaCXF1FZ2fcVujpp3WMA35GWnh1w43SoIJAVM,2145
462
- inspect_ai/util/_sandbox/self_check.py,sha256=o-5skGZzzT1HCh9R6gf_D9J-PmCl0VRbriX4rqUjZ60,14101
462
+ inspect_ai/util/_sandbox/self_check.py,sha256=ZRb2wMRnyiUBJPJqTfLlUO2_ctxBoJ-4soyERfn583A,15222
463
463
  inspect_ai/util/_sandbox/service.py,sha256=2os7W8NYBDcaBoaHVfZ1YrI9hvldksmiwqkUYrCRCPo,11258
464
464
  inspect_ai/util/_sandbox/docker/cleanup.py,sha256=MK6UlADcWtTDotppeVJga2ibf9Ud-e4V-5ReoNbmhqg,4793
465
465
  inspect_ai/util/_sandbox/docker/compose.py,sha256=4aIWWTaTUY9ZWrfSynkRqrUbKlYWrRYoSDX9WrjdHFQ,11473
@@ -468,9 +468,9 @@ inspect_ai/util/_sandbox/docker/docker.py,sha256=sx4PNv_4PDuKlkeYV6ASaZbo0XT-I_V
468
468
  inspect_ai/util/_sandbox/docker/internal.py,sha256=fATyk2pdtjSl-D0VPT4dmkXV-gOc5HrPH0EQDW4IAJY,1446
469
469
  inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
470
470
  inspect_ai/util/_sandbox/docker/util.py,sha256=pSPsRGymrTmTnEUHiHoQSNqeurPP1mL5kB-105O6EWo,2794
471
- inspect_ai-0.3.60.dist-info/LICENSE,sha256=aYPffOl9TwBXDQ8g33Jh6AsBhobb3A76qNm7r2HZsps,1079
472
- inspect_ai-0.3.60.dist-info/METADATA,sha256=xzklRJs-p-vYq21ksrYIVA2XCUvMVksqx5Gvtdpz6T8,4528
473
- inspect_ai-0.3.60.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
474
- inspect_ai-0.3.60.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
475
- inspect_ai-0.3.60.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
476
- inspect_ai-0.3.60.dist-info/RECORD,,
471
+ inspect_ai-0.3.61.dist-info/LICENSE,sha256=aYPffOl9TwBXDQ8g33Jh6AsBhobb3A76qNm7r2HZsps,1079
472
+ inspect_ai-0.3.61.dist-info/METADATA,sha256=8fUvCJ8CAPh0ANPl4OPuHGBmbhbewMoH6F1AA94SRTA,4528
473
+ inspect_ai-0.3.61.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
474
+ inspect_ai-0.3.61.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
475
+ inspect_ai-0.3.61.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
476
+ inspect_ai-0.3.61.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- [Desktop Entry]
2
- Version=1.0
3
- Type=Application
4
- Name=XPaint
5
- Comment=Xpaint painting application
6
- Exec=xpaint
7
- Icon=xpaint
8
- Path=
9
- Terminal=false
10
- StartupNotify=false