inspect-ai 0.3.71__py3-none-any.whl → 0.3.72__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  2. inspect_ai/_view/www/node_modules/flatted/python/test.py +63 -0
  3. inspect_ai/model/_providers/anthropic.py +148 -128
  4. inspect_ai/tool/_tools/_computer/_common.py +117 -58
  5. inspect_ai/tool/_tools/_computer/_computer.py +80 -57
  6. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +7 -1
  7. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +91 -0
  8. inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +8 -0
  9. inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +12 -0
  10. inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +78 -0
  11. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +20 -0
  12. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +175 -113
  13. inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +76 -20
  14. inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +65 -0
  15. inspect_ai/tool/_tools/_computer/test_args.py +151 -0
  16. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.72.dist-info}/METADATA +1 -1
  17. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.72.dist-info}/RECORD +21 -13
  18. inspect_ai/tool/_tools/_computer/_computer_split.py +0 -198
  19. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.72.dist-info}/LICENSE +0 -0
  20. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.72.dist-info}/WHEEL +0 -0
  21. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.72.dist-info}/entry_points.txt +0 -0
  22. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.72.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,149 @@
1
+ # ISC License
2
+ #
3
+ # Copyright (c) 2018-2021, Andrea Giammarchi, @WebReflection
4
+ #
5
+ # Permission to use, copy, modify, and/or distribute this software for any
6
+ # purpose with or without fee is hereby granted, provided that the above
7
+ # copyright notice and this permission notice appear in all copies.
8
+ #
9
+ # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10
+ # REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11
+ # AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
12
+ # INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13
+ # LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14
+ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15
+ # PERFORMANCE OF THIS SOFTWARE.
16
+
17
+ import json as _json
18
+
19
+ class _Known:
20
+ def __init__(self):
21
+ self.key = []
22
+ self.value = []
23
+
24
+ class _String:
25
+ def __init__(self, value):
26
+ self.value = value
27
+
28
+
29
+ def _array_keys(value):
30
+ keys = []
31
+ i = 0
32
+ for _ in value:
33
+ keys.append(i)
34
+ i += 1
35
+ return keys
36
+
37
+ def _object_keys(value):
38
+ keys = []
39
+ for key in value:
40
+ keys.append(key)
41
+ return keys
42
+
43
+ def _is_array(value):
44
+ return isinstance(value, list) or isinstance(value, tuple)
45
+
46
+ def _is_object(value):
47
+ return isinstance(value, dict)
48
+
49
+ def _is_string(value):
50
+ return isinstance(value, str)
51
+
52
+ def _index(known, input, value):
53
+ input.append(value)
54
+ index = str(len(input) - 1)
55
+ known.key.append(value)
56
+ known.value.append(index)
57
+ return index
58
+
59
+ def _loop(keys, input, known, output):
60
+ for key in keys:
61
+ value = output[key]
62
+ if isinstance(value, _String):
63
+ _ref(key, input[int(value.value)], input, known, output)
64
+
65
+ return output
66
+
67
+ def _ref(key, value, input, known, output):
68
+ if _is_array(value) and not value in known:
69
+ known.append(value)
70
+ value = _loop(_array_keys(value), input, known, value)
71
+ elif _is_object(value) and not value in known:
72
+ known.append(value)
73
+ value = _loop(_object_keys(value), input, known, value)
74
+
75
+ output[key] = value
76
+
77
+ def _relate(known, input, value):
78
+ if _is_string(value) or _is_array(value) or _is_object(value):
79
+ try:
80
+ return known.value[known.key.index(value)]
81
+ except:
82
+ return _index(known, input, value)
83
+
84
+ return value
85
+
86
+ def _transform(known, input, value):
87
+ if _is_array(value):
88
+ output = []
89
+ for val in value:
90
+ output.append(_relate(known, input, val))
91
+ return output
92
+
93
+ if _is_object(value):
94
+ obj = {}
95
+ for key in value:
96
+ obj[key] = _relate(known, input, value[key])
97
+ return obj
98
+
99
+ return value
100
+
101
+ def _wrap(value):
102
+ if _is_string(value):
103
+ return _String(value)
104
+
105
+ if _is_array(value):
106
+ i = 0
107
+ for val in value:
108
+ value[i] = _wrap(val)
109
+ i += 1
110
+
111
+ elif _is_object(value):
112
+ for key in value:
113
+ value[key] = _wrap(value[key])
114
+
115
+ return value
116
+
117
+ def parse(value, *args, **kwargs):
118
+ json = _json.loads(value, *args, **kwargs)
119
+ wrapped = []
120
+ for value in json:
121
+ wrapped.append(_wrap(value))
122
+
123
+ input = []
124
+ for value in wrapped:
125
+ if isinstance(value, _String):
126
+ input.append(value.value)
127
+ else:
128
+ input.append(value)
129
+
130
+ value = input[0]
131
+
132
+ if _is_array(value):
133
+ return _loop(_array_keys(value), input, [value], value)
134
+
135
+ if _is_object(value):
136
+ return _loop(_object_keys(value), input, [value], value)
137
+
138
+ return value
139
+
140
+
141
+ def stringify(value, *args, **kwargs):
142
+ known = _Known()
143
+ input = []
144
+ output = []
145
+ i = int(_index(known, input, value))
146
+ while i < len(input):
147
+ output.append(_transform(known, input, input[i]))
148
+ i += 1
149
+ return _json.dumps(output, *args, **kwargs)
@@ -0,0 +1,63 @@
1
+ from flatted import stringify as _stringify, parse
2
+
3
+ def stringify(value):
4
+ return _stringify(value, separators=(',', ':'))
5
+
6
+ assert stringify([None, None]) == '[[null,null]]'
7
+
8
+ a = []
9
+ o = {}
10
+
11
+ assert stringify(a) == '[[]]'
12
+ assert stringify(o) == '[{}]'
13
+
14
+ a.append(a)
15
+ o['o'] = o
16
+
17
+ assert stringify(a) == '[["0"]]'
18
+ assert stringify(o) == '[{"o":"0"}]'
19
+
20
+ b = parse(stringify(a))
21
+ assert isinstance(b, list) and b[0] == b
22
+
23
+ a.append(1)
24
+ a.append('two')
25
+ a.append(True)
26
+ o['one'] = 1
27
+ o['two'] = 'two'
28
+ o['three'] = True
29
+
30
+ assert stringify(a) == '[["0",1,"1",true],"two"]'
31
+ assert stringify(o) == '[{"o":"0","one":1,"two":"1","three":true},"two"]'
32
+
33
+ a.append(o)
34
+ o['a'] = a
35
+
36
+ assert stringify(a) == '[["0",1,"1",true,"2"],"two",{"o":"2","one":1,"two":"1","three":true,"a":"0"}]'
37
+ assert stringify(o) == '[{"o":"0","one":1,"two":"1","three":true,"a":"2"},"two",["2",1,"1",true,"0"]]'
38
+
39
+ a.append({'test': 'OK'})
40
+ a.append([1, 2, 3])
41
+
42
+ o['test'] = {'test': 'OK'}
43
+ o['array'] = [1, 2, 3]
44
+
45
+ assert stringify(a) == '[["0",1,"1",true,"2","3","4"],"two",{"o":"2","one":1,"two":"1","three":true,"a":"0","test":"3","array":"4"},{"test":"5"},[1,2,3],"OK"]'
46
+ assert stringify(o) == '[{"o":"0","one":1,"two":"1","three":true,"a":"2","test":"3","array":"4"},"two",["2",1,"1",true,"0","3","4"],{"test":"5"},[1,2,3],"OK"]'
47
+
48
+ a2 = parse(stringify(a));
49
+ o2 = parse(stringify(o));
50
+
51
+ assert a2[0] == a2
52
+ assert o2['o'] == o2
53
+
54
+ assert a2[1] == 1 and a2[2] == 'two' and a2[3] == True and isinstance(a2[4], dict)
55
+ assert a2[4] == a2[4]['o'] and a2 == a2[4]['o']['a']
56
+
57
+ str = parse('[{"prop":"1","a":"2","b":"3"},{"value":123},["4","5"],{"e":"6","t":"7","p":4},{},{"b":"8"},"f",{"a":"9"},["10"],"sup",{"a":1,"d":2,"c":"7","z":"11","h":1},{"g":2,"a":"7","b":"12","f":6},{"r":4,"u":"7","c":5}]')
58
+ assert str['b']['t']['a'] == 'sup' and str['a'][1]['b'][0]['c'] == str['b']['t']
59
+
60
+ oo = parse('[{"a":"1","b":"0","c":"2"},{"aa":"3"},{"ca":"4","cb":"5","cc":"6","cd":"7","ce":"8","cf":"9"},{"aaa":"10"},{"caa":"4"},{"cba":"5"},{"cca":"2"},{"cda":"4"},"value2","value3","value1"]');
61
+ assert oo['a']['aa']['aaa'] == 'value1' and oo == oo['b'] and oo['c']['ca']['caa'] == oo['c']['ca']
62
+
63
+ print('OK')
@@ -4,7 +4,7 @@ import re
4
4
  import sys
5
5
  from copy import copy
6
6
  from logging import getLogger
7
- from typing import Any, Literal, Tuple, TypedDict, cast
7
+ from typing import Any, Literal, Optional, Tuple, TypedDict, cast
8
8
 
9
9
  from .util.tracker import HttpxTimeTracker
10
10
 
@@ -204,7 +204,7 @@ class AnthropicAPI(ModelAPI):
204
204
  tools_param,
205
205
  messages,
206
206
  computer_use,
207
- ) = await resolve_chat_input(self.model_name, input, tools, config)
207
+ ) = await self.resolve_chat_input(input, tools, config)
208
208
 
209
209
  # prepare request params (assembed this way so we can log the raw model call)
210
210
  request = dict(messages=messages)
@@ -225,7 +225,7 @@ class AnthropicAPI(ModelAPI):
225
225
  # extra headers (for time tracker and computer use)
226
226
  extra_headers = headers | {HttpxTimeTracker.REQUEST_ID_HEADER: request_id}
227
227
  if computer_use:
228
- betas.append("computer-use-2024-10-22")
228
+ betas.append("computer-use-2025-01-24")
229
229
  if len(betas) > 0:
230
230
  extra_headers["anthropic-beta"] = ",".join(betas)
231
231
 
@@ -326,6 +326,9 @@ class AnthropicAPI(ModelAPI):
326
326
  def is_claude_3_5(self) -> bool:
327
327
  return "claude-3-5-" in self.model_name
328
328
 
329
+ def is_claude_3_7(self) -> bool:
330
+ return "claude-3-7-" in self.model_name
331
+
329
332
  @override
330
333
  def connection_key(self) -> str:
331
334
  return str(self.api_key)
@@ -397,6 +400,148 @@ class AnthropicAPI(ModelAPI):
397
400
  else:
398
401
  return ex
399
402
 
403
+ async def resolve_chat_input(
404
+ self,
405
+ input: list[ChatMessage],
406
+ tools: list[ToolInfo],
407
+ config: GenerateConfig,
408
+ ) -> Tuple[
409
+ list[TextBlockParam] | None, list["ToolParamDef"], list[MessageParam], bool
410
+ ]:
411
+ # extract system message
412
+ system_messages, messages = split_system_messages(input, config)
413
+
414
+ # messages
415
+ message_params = [(await message_param(message)) for message in messages]
416
+
417
+ # collapse user messages (as Inspect 'tool' messages become Claude 'user' messages)
418
+ message_params = functools.reduce(
419
+ consecutive_user_message_reducer, message_params, []
420
+ )
421
+
422
+ # tools
423
+ tools_params, computer_use = self.tool_params_for_tools(tools, config)
424
+
425
+ # system messages
426
+ if len(system_messages) > 0:
427
+ system_param: list[TextBlockParam] | None = [
428
+ TextBlockParam(type="text", text=message.text)
429
+ for message in system_messages
430
+ ]
431
+ else:
432
+ system_param = None
433
+
434
+ # add caching directives if necessary
435
+ cache_prompt = (
436
+ config.cache_prompt
437
+ if isinstance(config.cache_prompt, bool)
438
+ else True
439
+ if len(tools_params)
440
+ else False
441
+ )
442
+
443
+ # only certain claude models qualify
444
+ if cache_prompt:
445
+ if (
446
+ "claude-3-sonnet" in self.model_name
447
+ or "claude-2" in self.model_name
448
+ or "claude-instant" in self.model_name
449
+ ):
450
+ cache_prompt = False
451
+
452
+ if cache_prompt:
453
+ # system
454
+ if system_param:
455
+ add_cache_control(system_param[-1])
456
+ # tools
457
+ if tools_params:
458
+ add_cache_control(tools_params[-1])
459
+ # last 2 user messages
460
+ user_message_params = list(
461
+ filter(lambda m: m["role"] == "user", reversed(message_params))
462
+ )
463
+ for message in user_message_params[:2]:
464
+ if isinstance(message["content"], str):
465
+ text_param = TextBlockParam(type="text", text=message["content"])
466
+ add_cache_control(text_param)
467
+ message["content"] = [text_param]
468
+ else:
469
+ content = list(message["content"])
470
+ add_cache_control(cast(dict[str, Any], content[-1]))
471
+
472
+ # return chat input
473
+ return system_param, tools_params, message_params, computer_use
474
+
475
+ def tool_params_for_tools(
476
+ self, tools: list[ToolInfo], config: GenerateConfig
477
+ ) -> tuple[list["ToolParamDef"], bool]:
478
+ # tool params and computer_use bit to return
479
+ tool_params: list["ToolParamDef"] = []
480
+ computer_use = False
481
+
482
+ # for each tool, check if it has a native computer use implementation and use that
483
+ # when available (noting that we need to set the computer use request header)
484
+ for tool in tools:
485
+ computer_use_tool = (
486
+ self.computer_use_tool_param(tool)
487
+ if config.internal_tools is not False
488
+ else None
489
+ )
490
+ if computer_use_tool:
491
+ tool_params.append(computer_use_tool)
492
+ computer_use = True
493
+ else:
494
+ tool_params.append(
495
+ ToolParam(
496
+ name=tool.name,
497
+ description=tool.description,
498
+ input_schema=tool.parameters.model_dump(exclude_none=True),
499
+ )
500
+ )
501
+
502
+ return tool_params, computer_use
503
+
504
+ def computer_use_tool_param(
505
+ self, tool: ToolInfo
506
+ ) -> Optional["ComputerUseToolParam"]:
507
+ # check for compatible 'computer' tool
508
+ if tool.name == "computer" and (
509
+ sorted(tool.parameters.properties.keys())
510
+ == sorted(
511
+ [
512
+ "action",
513
+ "coordinate",
514
+ "duration",
515
+ "scroll_amount",
516
+ "scroll_direction",
517
+ "start_coordinate",
518
+ "text",
519
+ ]
520
+ )
521
+ ):
522
+ if self.is_claude_3_5():
523
+ warn_once(
524
+ logger,
525
+ "Use of Anthropic's native computer use support is not enabled in Claude 3.5. Please use 3.7 or later to leverage the native support.",
526
+ )
527
+ return None
528
+ return ComputerUseToolParam(
529
+ type="computer_20250124",
530
+ name="computer",
531
+ # Note: The dimensions passed here for display_width_px and display_height_px should
532
+ # match the dimensions of screenshots returned by the tool.
533
+ # Those dimensions will always be one of the values in MAX_SCALING_TARGETS
534
+ # in _x11_client.py.
535
+ # TODO: enhance this code to calculate the dimensions based on the scaled screen
536
+ # size used by the container.
537
+ display_width_px=1366,
538
+ display_height_px=768,
539
+ display_number=1,
540
+ )
541
+ # not a computer_use tool
542
+ else:
543
+ return None
544
+
400
545
 
401
546
  # native anthropic tool definitions for computer use beta
402
547
  # https://docs.anthropic.com/en/docs/build-with-claude/computer-use
@@ -412,131 +557,6 @@ class ComputerUseToolParam(TypedDict):
412
557
  ToolParamDef = ToolParam | ComputerUseToolParam
413
558
 
414
559
 
415
- async def resolve_chat_input(
416
- model: str,
417
- input: list[ChatMessage],
418
- tools: list[ToolInfo],
419
- config: GenerateConfig,
420
- ) -> Tuple[list[TextBlockParam] | None, list[ToolParamDef], list[MessageParam], bool]:
421
- # extract system message
422
- system_messages, messages = split_system_messages(input, config)
423
-
424
- # messages
425
- message_params = [(await message_param(message)) for message in messages]
426
-
427
- # collapse user messages (as Inspect 'tool' messages become Claude 'user' messages)
428
- message_params = functools.reduce(
429
- consecutive_user_message_reducer, message_params, []
430
- )
431
-
432
- # tools
433
- tools_params, computer_use = tool_params_for_tools(tools, config)
434
-
435
- # system messages
436
- if len(system_messages) > 0:
437
- system_param: list[TextBlockParam] | None = [
438
- TextBlockParam(type="text", text=message.text)
439
- for message in system_messages
440
- ]
441
- else:
442
- system_param = None
443
-
444
- # add caching directives if necessary
445
- cache_prompt = (
446
- config.cache_prompt
447
- if isinstance(config.cache_prompt, bool)
448
- else True
449
- if len(tools_params)
450
- else False
451
- )
452
-
453
- # only certain claude models qualify
454
- if cache_prompt:
455
- if (
456
- "claude-3-sonnet" in model
457
- or "claude-2" in model
458
- or "claude-instant" in model
459
- ):
460
- cache_prompt = False
461
-
462
- if cache_prompt:
463
- # system
464
- if system_param:
465
- add_cache_control(system_param[-1])
466
- # tools
467
- if tools_params:
468
- add_cache_control(tools_params[-1])
469
- # last 2 user messages
470
- user_message_params = list(
471
- filter(lambda m: m["role"] == "user", reversed(message_params))
472
- )
473
- for message in user_message_params[:2]:
474
- if isinstance(message["content"], str):
475
- text_param = TextBlockParam(type="text", text=message["content"])
476
- add_cache_control(text_param)
477
- message["content"] = [text_param]
478
- else:
479
- content = list(message["content"])
480
- add_cache_control(cast(dict[str, Any], content[-1]))
481
-
482
- # return chat input
483
- return system_param, tools_params, message_params, computer_use
484
-
485
-
486
- def tool_params_for_tools(
487
- tools: list[ToolInfo], config: GenerateConfig
488
- ) -> tuple[list[ToolParamDef], bool]:
489
- # tool params and computer_use bit to return
490
- tool_params: list[ToolParamDef] = []
491
- computer_use = False
492
-
493
- # for each tool, check if it has a native computer use implementation and use that
494
- # when available (noting that we need to set the computer use request header)
495
- for tool in tools:
496
- computer_use_tool = (
497
- computer_use_tool_param(tool)
498
- if config.internal_tools is not False
499
- else None
500
- )
501
- if computer_use_tool:
502
- tool_params.append(computer_use_tool)
503
- computer_use = True
504
- else:
505
- tool_params.append(
506
- ToolParam(
507
- name=tool.name,
508
- description=tool.description,
509
- input_schema=tool.parameters.model_dump(exclude_none=True),
510
- )
511
- )
512
-
513
- return tool_params, computer_use
514
-
515
-
516
- def computer_use_tool_param(tool: ToolInfo) -> ComputerUseToolParam | None:
517
- # check for compatible 'computer' tool
518
- if tool.name == "computer" and (
519
- sorted(tool.parameters.properties.keys())
520
- == sorted(["action", "coordinate", "text"])
521
- ):
522
- return ComputerUseToolParam(
523
- type="computer_20241022",
524
- name="computer",
525
- # Note: The dimensions passed here for display_width_px and display_height_px should
526
- # match the dimensions of screenshots returned by the tool.
527
- # Those dimensions will always be one of the values in MAX_SCALING_TARGETS
528
- # in _x11_client.py.
529
- # TODO: enhance this code to calculate the dimensions based on the scaled screen
530
- # size used by the container.
531
- display_width_px=1366,
532
- display_height_px=768,
533
- display_number=1,
534
- )
535
- # not a computer_use tool
536
- else:
537
- return None
538
-
539
-
540
560
  def add_cache_control(
541
561
  param: TextBlockParam | ToolParam | ComputerUseToolParam | dict[str, Any],
542
562
  ) -> None: