inspect-ai 0.3.92__py3-none-any.whl → 0.3.94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. inspect_ai/_cli/eval.py +27 -0
  2. inspect_ai/_display/textual/widgets/samples.py +3 -3
  3. inspect_ai/_display/textual/widgets/transcript.py +3 -29
  4. inspect_ai/_eval/eval.py +19 -2
  5. inspect_ai/_eval/evalset.py +4 -1
  6. inspect_ai/_eval/run.py +41 -0
  7. inspect_ai/_eval/task/generate.py +38 -44
  8. inspect_ai/_eval/task/log.py +26 -28
  9. inspect_ai/_eval/task/run.py +23 -27
  10. inspect_ai/_util/answer.py +26 -0
  11. inspect_ai/_util/constants.py +0 -1
  12. inspect_ai/_util/local_server.py +398 -0
  13. inspect_ai/_util/working.py +10 -4
  14. inspect_ai/_view/www/dist/assets/index.css +173 -159
  15. inspect_ai/_view/www/dist/assets/index.js +1417 -1142
  16. inspect_ai/_view/www/log-schema.json +379 -3
  17. inspect_ai/_view/www/package.json +1 -1
  18. inspect_ai/_view/www/src/@types/log.d.ts +93 -14
  19. inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
  20. inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
  21. inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
  22. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
  23. inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
  24. inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
  25. inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
  26. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
  27. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
  28. inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
  29. inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
  30. inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
  31. inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
  32. inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
  33. inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
  34. inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
  35. inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
  36. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
  37. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
  38. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
  39. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
  40. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
  41. inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
  42. inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
  43. inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
  44. inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
  45. inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
  46. inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
  47. inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
  48. inspect_ai/_view/www/src/components/Card.css +0 -1
  49. inspect_ai/_view/www/src/constants.ts +2 -0
  50. inspect_ai/_view/www/src/utils/numeric.ts +17 -0
  51. inspect_ai/agent/_agent.py +3 -3
  52. inspect_ai/agent/_as_solver.py +22 -12
  53. inspect_ai/agent/_as_tool.py +20 -6
  54. inspect_ai/agent/_handoff.py +12 -1
  55. inspect_ai/agent/_react.py +4 -3
  56. inspect_ai/agent/_run.py +16 -3
  57. inspect_ai/agent/_types.py +9 -0
  58. inspect_ai/dataset/_dataset.py +6 -3
  59. inspect_ai/log/__init__.py +14 -0
  60. inspect_ai/log/_convert.py +4 -9
  61. inspect_ai/log/_file.py +56 -0
  62. inspect_ai/log/_log.py +99 -0
  63. inspect_ai/log/_recorders/__init__.py +2 -0
  64. inspect_ai/log/_recorders/buffer/database.py +12 -11
  65. inspect_ai/log/_recorders/buffer/filestore.py +2 -2
  66. inspect_ai/log/_recorders/buffer/types.py +2 -2
  67. inspect_ai/log/_recorders/eval.py +20 -65
  68. inspect_ai/log/_recorders/file.py +28 -6
  69. inspect_ai/log/_recorders/recorder.py +7 -0
  70. inspect_ai/log/_recorders/types.py +1 -23
  71. inspect_ai/log/_samples.py +14 -25
  72. inspect_ai/log/_transcript.py +84 -36
  73. inspect_ai/log/_tree.py +118 -0
  74. inspect_ai/log/_util.py +52 -0
  75. inspect_ai/model/__init__.py +5 -1
  76. inspect_ai/model/_call_tools.py +72 -44
  77. inspect_ai/model/_generate_config.py +14 -8
  78. inspect_ai/model/_model.py +66 -88
  79. inspect_ai/model/_model_output.py +25 -0
  80. inspect_ai/model/_openai.py +2 -0
  81. inspect_ai/model/_providers/anthropic.py +13 -23
  82. inspect_ai/model/_providers/hf.py +27 -1
  83. inspect_ai/model/_providers/openai_o1.py +8 -2
  84. inspect_ai/model/_providers/providers.py +18 -4
  85. inspect_ai/model/_providers/sglang.py +247 -0
  86. inspect_ai/model/_providers/vllm.py +211 -400
  87. inspect_ai/scorer/_choice.py +1 -2
  88. inspect_ai/solver/__init__.py +7 -2
  89. inspect_ai/solver/_basic_agent.py +3 -10
  90. inspect_ai/solver/_chain.py +1 -1
  91. inspect_ai/solver/_fork.py +1 -1
  92. inspect_ai/solver/_multiple_choice.py +5 -22
  93. inspect_ai/solver/_plan.py +2 -2
  94. inspect_ai/solver/_task_state.py +26 -88
  95. inspect_ai/solver/_transcript.py +6 -7
  96. inspect_ai/tool/_json_rpc_helpers.py +45 -17
  97. inspect_ai/tool/_mcp/_mcp.py +8 -5
  98. inspect_ai/tool/_mcp/_sandbox.py +8 -2
  99. inspect_ai/tool/_mcp/server.py +3 -1
  100. inspect_ai/tool/_tool_call.py +4 -1
  101. inspect_ai/tool/_tool_support_helpers.py +51 -12
  102. inspect_ai/tool/_tools/_bash_session.py +190 -68
  103. inspect_ai/tool/_tools/_computer/_computer.py +25 -1
  104. inspect_ai/tool/_tools/_execute.py +4 -1
  105. inspect_ai/tool/_tools/_text_editor.py +4 -3
  106. inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
  107. inspect_ai/util/__init__.py +16 -0
  108. inspect_ai/util/_anyio.py +11 -0
  109. inspect_ai/util/_collect.py +50 -0
  110. inspect_ai/util/_limit.py +393 -0
  111. inspect_ai/util/_limited_conversation.py +57 -0
  112. inspect_ai/util/_span.py +58 -0
  113. inspect_ai/util/_subtask.py +27 -42
  114. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/METADATA +1 -1
  115. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/RECORD +120 -134
  116. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/WHEEL +1 -1
  117. inspect_ai/_display/core/group.py +0 -79
  118. inspect_ai/solver/_limit.py +0 -39
  119. inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
  120. inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
  121. inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
  122. inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
  123. inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
  124. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
  125. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
  126. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  127. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
  128. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
  129. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
  130. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
  131. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
  132. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
  133. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
  134. inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
  135. inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
  136. inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
  137. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
  138. inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
  139. inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
  140. inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
  141. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
  142. inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
  143. inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
  144. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  145. inspect_ai/tool/_tools/_computer/test_args.py +0 -151
  146. /inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
  147. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/entry_points.txt +0 -0
  148. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/licenses/LICENSE +0 -0
  149. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/top_level.txt +0 -0
@@ -1,65 +0,0 @@
1
- [build-system]
2
- requires = ["setuptools>=64", "setuptools_scm[toml]>=8"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [tool.setuptools_scm]
6
-
7
- [tool.setuptools.packages.find]
8
- where = ["."]
9
- include = ["inspect_ai*"]
10
-
11
- [tool.ruff]
12
- src = ["."]
13
-
14
- [tool.ruff.lint]
15
- select = [
16
- "E", # pycodestyle errors
17
- "W", # pycodestyle warnings
18
- "F", # flake8
19
- "D", # pydocstyle
20
- "I", # isort
21
- "SIM101", # duplicate isinstance
22
- "UP038", # non-pep604-isinstance
23
- # "RET", # flake8-return
24
- # "RUF", # ruff rules
25
- ]
26
- ignore = ["E203", "E501", "D10", "D212", "D415"]
27
-
28
- [tool.ruff.lint.pydocstyle]
29
- convention = "google"
30
-
31
- [tool.pytest.ini_options]
32
- minversion = "7.0"
33
- addopts = "-rA --doctest-modules --color=yes"
34
- doctest_optionflags = ["NORMALIZE_WHITESPACE", "IGNORE_EXCEPTION_DETAIL"]
35
- asyncio_mode = "auto"
36
- asyncio_default_fixture_loop_scope = "function"
37
- log_level = "warning"
38
-
39
- [tool.mypy]
40
- warn_unused_ignores = true
41
- no_implicit_reexport = true
42
- strict_equality = true
43
- warn_redundant_casts = true
44
- warn_unused_configs = true
45
- disallow_any_explicit = true
46
- disallow_any_generics = true
47
- disallow_subclassing_any = true
48
- plugins=["pydantic.mypy"]
49
-
50
-
51
- [tool.pydantic-mypy]
52
- init_forbid_extra = true
53
- init_typed = true
54
-
55
- [tool.check-wheel-contents]
56
- ignore = ["W002", "W009"]
57
-
58
- [project]
59
- name = "web_browser_tool_container"
60
- requires-python = ">=3.10"
61
- dynamic = ["version", "dependencies"]
62
-
63
-
64
- [project.optional-dependencies]
65
- dev = ["pytest"]
@@ -1,151 +0,0 @@
1
- import pytest
2
-
3
- from ._resources.tool._args import parse_arguments
4
-
5
-
6
- def test_parse_args_screenshot() -> None:
7
- args = parse_arguments(["screenshot"])
8
- assert args.action == "screenshot"
9
-
10
-
11
- def test_parse_args_cursor_position() -> None:
12
- args = parse_arguments(["cursor_position"])
13
- assert args.action == "cursor_position"
14
-
15
-
16
- def test_parse_args_type() -> None:
17
- args = parse_arguments(["type", "--text", "hello"])
18
- assert args.action == "type"
19
- assert args.text == "hello"
20
-
21
-
22
- def test_parse_args_mouse_move() -> None:
23
- args = parse_arguments(["mouse_move", "--coordinate", "100", "200"])
24
- assert args.action == "mouse_move"
25
- assert args.coordinate == [100, 200]
26
-
27
-
28
- def test_parse_args_left_click() -> None:
29
- args = parse_arguments(["left_click", "--coordinate", "100", "200"])
30
- assert args.action == "left_click"
31
- assert args.coordinate == [100, 200]
32
-
33
-
34
- def test_parse_args_right_click() -> None:
35
- args = parse_arguments(["right_click", "--coordinate", "100", "200"])
36
- assert args.action == "right_click"
37
- assert args.coordinate == [100, 200]
38
-
39
-
40
- def test_parse_args_middle_click() -> None:
41
- args = parse_arguments(["middle_click", "--coordinate", "100", "200"])
42
- assert args.action == "middle_click"
43
- assert args.coordinate == [100, 200]
44
-
45
-
46
- def test_parse_args_double_click() -> None:
47
- args = parse_arguments(["double_click", "--coordinate", "100", "200"])
48
- assert args.action == "double_click"
49
- assert args.coordinate == [100, 200]
50
-
51
-
52
- def test_parse_args_triple_click() -> None:
53
- args = parse_arguments(["triple_click", "--coordinate", "100", "200"])
54
- assert args.action == "triple_click"
55
- assert args.coordinate == [100, 200]
56
-
57
-
58
- def test_parse_args_hold_key() -> None:
59
- args = parse_arguments(["hold_key", "--text", "a", "--duration", "5"])
60
- assert args.action == "hold_key"
61
- assert args.text == "a"
62
- assert args.duration == 5
63
-
64
-
65
- def test_parse_args_left_click_drag() -> None:
66
- args = parse_arguments(
67
- [
68
- "left_click_drag",
69
- "--start_coordinate",
70
- "100",
71
- "200",
72
- "--coordinate",
73
- "300",
74
- "400",
75
- "--text",
76
- "drag",
77
- ]
78
- )
79
- assert args.action == "left_click_drag"
80
- assert args.start_coordinate == [100, 200]
81
- assert args.coordinate == [300, 400]
82
- assert args.text == "drag"
83
-
84
-
85
- def test_parse_args_scroll() -> None:
86
- args = parse_arguments(
87
- [
88
- "scroll",
89
- "--scroll_direction",
90
- "up",
91
- "--scroll_amount",
92
- "10",
93
- "--coordinate",
94
- "100",
95
- "200",
96
- ]
97
- )
98
- assert args.action == "scroll"
99
- assert args.scroll_direction == "up"
100
- assert args.scroll_amount == 10
101
- assert args.coordinate == [100, 200]
102
-
103
-
104
- def test_parse_args_wait() -> None:
105
- args = parse_arguments(["wait", "--duration", "5"])
106
- assert args.action == "wait"
107
- assert args.duration == 5
108
-
109
-
110
- def test_parse_args_type_missing_text() -> None:
111
- with pytest.raises(SystemExit):
112
- parse_arguments(["type"])
113
-
114
-
115
- def test_parse_args_invalid_action() -> None:
116
- with pytest.raises(SystemExit):
117
- parse_arguments(["invalid_action"])
118
-
119
-
120
- def test_parse_args_mouse_move_missing_coordinate() -> None:
121
- with pytest.raises(SystemExit):
122
- parse_arguments(["mouse_move"])
123
-
124
-
125
- def test_parse_args_click_invalid_coordinate() -> None:
126
- with pytest.raises(SystemExit):
127
- parse_arguments(["left_click", "--coordinate", "100"])
128
-
129
-
130
- def test_parse_args_hold_key_missing_duration() -> None:
131
- with pytest.raises(SystemExit):
132
- parse_arguments(["hold_key", "--text", "a"])
133
-
134
-
135
- def test_parse_args_left_click_drag_missing_start_coordinate() -> None:
136
- with pytest.raises(SystemExit):
137
- parse_arguments(
138
- ["left_click_drag", "--coordinate", "300", "400", "--text", "drag"]
139
- )
140
-
141
-
142
- def test_parse_args_scroll_missing_scroll_direction() -> None:
143
- with pytest.raises(SystemExit):
144
- parse_arguments(
145
- ["scroll", "--scroll_amount", "10", "--coordinate", "100", "200"]
146
- )
147
-
148
-
149
- def test_parse_args_wait_missing_duration() -> None:
150
- with pytest.raises(SystemExit):
151
- parse_arguments(["wait"])