inspect-ai 0.3.92__py3-none-any.whl → 0.3.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +27 -0
- inspect_ai/_display/textual/widgets/samples.py +3 -3
- inspect_ai/_display/textual/widgets/transcript.py +3 -29
- inspect_ai/_eval/eval.py +19 -2
- inspect_ai/_eval/evalset.py +4 -1
- inspect_ai/_eval/run.py +41 -0
- inspect_ai/_eval/task/generate.py +38 -44
- inspect_ai/_eval/task/log.py +26 -28
- inspect_ai/_eval/task/run.py +23 -27
- inspect_ai/_util/answer.py +26 -0
- inspect_ai/_util/constants.py +0 -1
- inspect_ai/_util/local_server.py +398 -0
- inspect_ai/_util/working.py +10 -4
- inspect_ai/_view/www/dist/assets/index.css +173 -159
- inspect_ai/_view/www/dist/assets/index.js +1417 -1142
- inspect_ai/_view/www/log-schema.json +379 -3
- inspect_ai/_view/www/package.json +1 -1
- inspect_ai/_view/www/src/@types/log.d.ts +93 -14
- inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
- inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
- inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
- inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
- inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
- inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
- inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
- inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
- inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
- inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
- inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
- inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
- inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
- inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
- inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
- inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
- inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
- inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
- inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
- inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
- inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
- inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
- inspect_ai/_view/www/src/components/Card.css +0 -1
- inspect_ai/_view/www/src/constants.ts +2 -0
- inspect_ai/_view/www/src/utils/numeric.ts +17 -0
- inspect_ai/agent/_agent.py +3 -3
- inspect_ai/agent/_as_solver.py +22 -12
- inspect_ai/agent/_as_tool.py +20 -6
- inspect_ai/agent/_handoff.py +12 -1
- inspect_ai/agent/_react.py +4 -3
- inspect_ai/agent/_run.py +16 -3
- inspect_ai/agent/_types.py +9 -0
- inspect_ai/dataset/_dataset.py +6 -3
- inspect_ai/log/__init__.py +14 -0
- inspect_ai/log/_convert.py +4 -9
- inspect_ai/log/_file.py +56 -0
- inspect_ai/log/_log.py +99 -0
- inspect_ai/log/_recorders/__init__.py +2 -0
- inspect_ai/log/_recorders/buffer/database.py +12 -11
- inspect_ai/log/_recorders/buffer/filestore.py +2 -2
- inspect_ai/log/_recorders/buffer/types.py +2 -2
- inspect_ai/log/_recorders/eval.py +20 -65
- inspect_ai/log/_recorders/file.py +28 -6
- inspect_ai/log/_recorders/recorder.py +7 -0
- inspect_ai/log/_recorders/types.py +1 -23
- inspect_ai/log/_samples.py +14 -25
- inspect_ai/log/_transcript.py +84 -36
- inspect_ai/log/_tree.py +118 -0
- inspect_ai/log/_util.py +52 -0
- inspect_ai/model/__init__.py +5 -1
- inspect_ai/model/_call_tools.py +72 -44
- inspect_ai/model/_generate_config.py +14 -8
- inspect_ai/model/_model.py +66 -88
- inspect_ai/model/_model_output.py +25 -0
- inspect_ai/model/_openai.py +2 -0
- inspect_ai/model/_providers/anthropic.py +13 -23
- inspect_ai/model/_providers/hf.py +27 -1
- inspect_ai/model/_providers/openai_o1.py +8 -2
- inspect_ai/model/_providers/providers.py +18 -4
- inspect_ai/model/_providers/sglang.py +247 -0
- inspect_ai/model/_providers/vllm.py +211 -400
- inspect_ai/scorer/_choice.py +1 -2
- inspect_ai/solver/__init__.py +7 -2
- inspect_ai/solver/_basic_agent.py +3 -10
- inspect_ai/solver/_chain.py +1 -1
- inspect_ai/solver/_fork.py +1 -1
- inspect_ai/solver/_multiple_choice.py +5 -22
- inspect_ai/solver/_plan.py +2 -2
- inspect_ai/solver/_task_state.py +26 -88
- inspect_ai/solver/_transcript.py +6 -7
- inspect_ai/tool/_json_rpc_helpers.py +45 -17
- inspect_ai/tool/_mcp/_mcp.py +8 -5
- inspect_ai/tool/_mcp/_sandbox.py +8 -2
- inspect_ai/tool/_mcp/server.py +3 -1
- inspect_ai/tool/_tool_call.py +4 -1
- inspect_ai/tool/_tool_support_helpers.py +51 -12
- inspect_ai/tool/_tools/_bash_session.py +190 -68
- inspect_ai/tool/_tools/_computer/_computer.py +25 -1
- inspect_ai/tool/_tools/_execute.py +4 -1
- inspect_ai/tool/_tools/_text_editor.py +4 -3
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
- inspect_ai/util/__init__.py +16 -0
- inspect_ai/util/_anyio.py +11 -0
- inspect_ai/util/_collect.py +50 -0
- inspect_ai/util/_limit.py +393 -0
- inspect_ai/util/_limited_conversation.py +57 -0
- inspect_ai/util/_span.py +58 -0
- inspect_ai/util/_subtask.py +27 -42
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/RECORD +120 -134
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/WHEEL +1 -1
- inspect_ai/_display/core/group.py +0 -79
- inspect_ai/solver/_limit.py +0 -39
- inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
- inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
- inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
- inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
- inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
- inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
- inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
- inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
- inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_computer/test_args.py +0 -151
- /inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/top_level.txt +0 -0
@@ -1,65 +0,0 @@
|
|
1
|
-
[build-system]
|
2
|
-
requires = ["setuptools>=64", "setuptools_scm[toml]>=8"]
|
3
|
-
build-backend = "setuptools.build_meta"
|
4
|
-
|
5
|
-
[tool.setuptools_scm]
|
6
|
-
|
7
|
-
[tool.setuptools.packages.find]
|
8
|
-
where = ["."]
|
9
|
-
include = ["inspect_ai*"]
|
10
|
-
|
11
|
-
[tool.ruff]
|
12
|
-
src = ["."]
|
13
|
-
|
14
|
-
[tool.ruff.lint]
|
15
|
-
select = [
|
16
|
-
"E", # pycodestyle errors
|
17
|
-
"W", # pycodestyle warnings
|
18
|
-
"F", # flake8
|
19
|
-
"D", # pydocstyle
|
20
|
-
"I", # isort
|
21
|
-
"SIM101", # duplicate isinstance
|
22
|
-
"UP038", # non-pep604-isinstance
|
23
|
-
# "RET", # flake8-return
|
24
|
-
# "RUF", # ruff rules
|
25
|
-
]
|
26
|
-
ignore = ["E203", "E501", "D10", "D212", "D415"]
|
27
|
-
|
28
|
-
[tool.ruff.lint.pydocstyle]
|
29
|
-
convention = "google"
|
30
|
-
|
31
|
-
[tool.pytest.ini_options]
|
32
|
-
minversion = "7.0"
|
33
|
-
addopts = "-rA --doctest-modules --color=yes"
|
34
|
-
doctest_optionflags = ["NORMALIZE_WHITESPACE", "IGNORE_EXCEPTION_DETAIL"]
|
35
|
-
asyncio_mode = "auto"
|
36
|
-
asyncio_default_fixture_loop_scope = "function"
|
37
|
-
log_level = "warning"
|
38
|
-
|
39
|
-
[tool.mypy]
|
40
|
-
warn_unused_ignores = true
|
41
|
-
no_implicit_reexport = true
|
42
|
-
strict_equality = true
|
43
|
-
warn_redundant_casts = true
|
44
|
-
warn_unused_configs = true
|
45
|
-
disallow_any_explicit = true
|
46
|
-
disallow_any_generics = true
|
47
|
-
disallow_subclassing_any = true
|
48
|
-
plugins=["pydantic.mypy"]
|
49
|
-
|
50
|
-
|
51
|
-
[tool.pydantic-mypy]
|
52
|
-
init_forbid_extra = true
|
53
|
-
init_typed = true
|
54
|
-
|
55
|
-
[tool.check-wheel-contents]
|
56
|
-
ignore = ["W002", "W009"]
|
57
|
-
|
58
|
-
[project]
|
59
|
-
name = "web_browser_tool_container"
|
60
|
-
requires-python = ">=3.10"
|
61
|
-
dynamic = ["version", "dependencies"]
|
62
|
-
|
63
|
-
|
64
|
-
[project.optional-dependencies]
|
65
|
-
dev = ["pytest"]
|
File without changes
|
@@ -1,151 +0,0 @@
|
|
1
|
-
import pytest
|
2
|
-
|
3
|
-
from ._resources.tool._args import parse_arguments
|
4
|
-
|
5
|
-
|
6
|
-
def test_parse_args_screenshot() -> None:
|
7
|
-
args = parse_arguments(["screenshot"])
|
8
|
-
assert args.action == "screenshot"
|
9
|
-
|
10
|
-
|
11
|
-
def test_parse_args_cursor_position() -> None:
|
12
|
-
args = parse_arguments(["cursor_position"])
|
13
|
-
assert args.action == "cursor_position"
|
14
|
-
|
15
|
-
|
16
|
-
def test_parse_args_type() -> None:
|
17
|
-
args = parse_arguments(["type", "--text", "hello"])
|
18
|
-
assert args.action == "type"
|
19
|
-
assert args.text == "hello"
|
20
|
-
|
21
|
-
|
22
|
-
def test_parse_args_mouse_move() -> None:
|
23
|
-
args = parse_arguments(["mouse_move", "--coordinate", "100", "200"])
|
24
|
-
assert args.action == "mouse_move"
|
25
|
-
assert args.coordinate == [100, 200]
|
26
|
-
|
27
|
-
|
28
|
-
def test_parse_args_left_click() -> None:
|
29
|
-
args = parse_arguments(["left_click", "--coordinate", "100", "200"])
|
30
|
-
assert args.action == "left_click"
|
31
|
-
assert args.coordinate == [100, 200]
|
32
|
-
|
33
|
-
|
34
|
-
def test_parse_args_right_click() -> None:
|
35
|
-
args = parse_arguments(["right_click", "--coordinate", "100", "200"])
|
36
|
-
assert args.action == "right_click"
|
37
|
-
assert args.coordinate == [100, 200]
|
38
|
-
|
39
|
-
|
40
|
-
def test_parse_args_middle_click() -> None:
|
41
|
-
args = parse_arguments(["middle_click", "--coordinate", "100", "200"])
|
42
|
-
assert args.action == "middle_click"
|
43
|
-
assert args.coordinate == [100, 200]
|
44
|
-
|
45
|
-
|
46
|
-
def test_parse_args_double_click() -> None:
|
47
|
-
args = parse_arguments(["double_click", "--coordinate", "100", "200"])
|
48
|
-
assert args.action == "double_click"
|
49
|
-
assert args.coordinate == [100, 200]
|
50
|
-
|
51
|
-
|
52
|
-
def test_parse_args_triple_click() -> None:
|
53
|
-
args = parse_arguments(["triple_click", "--coordinate", "100", "200"])
|
54
|
-
assert args.action == "triple_click"
|
55
|
-
assert args.coordinate == [100, 200]
|
56
|
-
|
57
|
-
|
58
|
-
def test_parse_args_hold_key() -> None:
|
59
|
-
args = parse_arguments(["hold_key", "--text", "a", "--duration", "5"])
|
60
|
-
assert args.action == "hold_key"
|
61
|
-
assert args.text == "a"
|
62
|
-
assert args.duration == 5
|
63
|
-
|
64
|
-
|
65
|
-
def test_parse_args_left_click_drag() -> None:
|
66
|
-
args = parse_arguments(
|
67
|
-
[
|
68
|
-
"left_click_drag",
|
69
|
-
"--start_coordinate",
|
70
|
-
"100",
|
71
|
-
"200",
|
72
|
-
"--coordinate",
|
73
|
-
"300",
|
74
|
-
"400",
|
75
|
-
"--text",
|
76
|
-
"drag",
|
77
|
-
]
|
78
|
-
)
|
79
|
-
assert args.action == "left_click_drag"
|
80
|
-
assert args.start_coordinate == [100, 200]
|
81
|
-
assert args.coordinate == [300, 400]
|
82
|
-
assert args.text == "drag"
|
83
|
-
|
84
|
-
|
85
|
-
def test_parse_args_scroll() -> None:
|
86
|
-
args = parse_arguments(
|
87
|
-
[
|
88
|
-
"scroll",
|
89
|
-
"--scroll_direction",
|
90
|
-
"up",
|
91
|
-
"--scroll_amount",
|
92
|
-
"10",
|
93
|
-
"--coordinate",
|
94
|
-
"100",
|
95
|
-
"200",
|
96
|
-
]
|
97
|
-
)
|
98
|
-
assert args.action == "scroll"
|
99
|
-
assert args.scroll_direction == "up"
|
100
|
-
assert args.scroll_amount == 10
|
101
|
-
assert args.coordinate == [100, 200]
|
102
|
-
|
103
|
-
|
104
|
-
def test_parse_args_wait() -> None:
|
105
|
-
args = parse_arguments(["wait", "--duration", "5"])
|
106
|
-
assert args.action == "wait"
|
107
|
-
assert args.duration == 5
|
108
|
-
|
109
|
-
|
110
|
-
def test_parse_args_type_missing_text() -> None:
|
111
|
-
with pytest.raises(SystemExit):
|
112
|
-
parse_arguments(["type"])
|
113
|
-
|
114
|
-
|
115
|
-
def test_parse_args_invalid_action() -> None:
|
116
|
-
with pytest.raises(SystemExit):
|
117
|
-
parse_arguments(["invalid_action"])
|
118
|
-
|
119
|
-
|
120
|
-
def test_parse_args_mouse_move_missing_coordinate() -> None:
|
121
|
-
with pytest.raises(SystemExit):
|
122
|
-
parse_arguments(["mouse_move"])
|
123
|
-
|
124
|
-
|
125
|
-
def test_parse_args_click_invalid_coordinate() -> None:
|
126
|
-
with pytest.raises(SystemExit):
|
127
|
-
parse_arguments(["left_click", "--coordinate", "100"])
|
128
|
-
|
129
|
-
|
130
|
-
def test_parse_args_hold_key_missing_duration() -> None:
|
131
|
-
with pytest.raises(SystemExit):
|
132
|
-
parse_arguments(["hold_key", "--text", "a"])
|
133
|
-
|
134
|
-
|
135
|
-
def test_parse_args_left_click_drag_missing_start_coordinate() -> None:
|
136
|
-
with pytest.raises(SystemExit):
|
137
|
-
parse_arguments(
|
138
|
-
["left_click_drag", "--coordinate", "300", "400", "--text", "drag"]
|
139
|
-
)
|
140
|
-
|
141
|
-
|
142
|
-
def test_parse_args_scroll_missing_scroll_direction() -> None:
|
143
|
-
with pytest.raises(SystemExit):
|
144
|
-
parse_arguments(
|
145
|
-
["scroll", "--scroll_amount", "10", "--coordinate", "100", "200"]
|
146
|
-
)
|
147
|
-
|
148
|
-
|
149
|
-
def test_parse_args_wait_missing_duration() -> None:
|
150
|
-
with pytest.raises(SystemExit):
|
151
|
-
parse_arguments(["wait"])
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|