inspect-ai 0.3.75__py3-none-any.whl → 0.3.76__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_eval/evalset.py +3 -2
- inspect_ai/_eval/registry.py +3 -5
- inspect_ai/_eval/run.py +4 -0
- inspect_ai/_eval/task/run.py +4 -0
- inspect_ai/_util/logger.py +3 -0
- inspect_ai/_view/www/dist/assets/index.css +28 -16
- inspect_ai/_view/www/dist/assets/index.js +4801 -4615
- inspect_ai/_view/www/log-schema.json +79 -9
- inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +1 -1
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +2 -2
- inspect_ai/_view/www/src/types/log.d.ts +11 -5
- inspect_ai/log/_recorders/json.py +8 -0
- inspect_ai/log/_transcript.py +13 -4
- inspect_ai/model/_call_tools.py +13 -4
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_model.py +5 -1
- inspect_ai/model/_model_output.py +6 -1
- inspect_ai/model/_openai.py +11 -6
- inspect_ai/model/_providers/anthropic.py +133 -75
- inspect_ai/model/_providers/openai.py +11 -8
- inspect_ai/model/_providers/vertex.py +5 -2
- inspect_ai/tool/__init__.py +4 -0
- inspect_ai/tool/_tool_call.py +5 -2
- inspect_ai/tool/_tool_support_helpers.py +200 -0
- inspect_ai/tool/_tools/_bash_session.py +119 -0
- inspect_ai/tool/_tools/_computer/_computer.py +1 -1
- inspect_ai/tool/_tools/_text_editor.py +121 -0
- inspect_ai/tool/_tools/_web_browser/_back_compat.py +150 -0
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +75 -130
- inspect_ai/tool/_tools/_web_search.py +1 -1
- inspect_ai/util/_json.py +28 -0
- inspect_ai/util/_sandbox/context.py +16 -7
- inspect_ai/util/_sandbox/docker/config.py +1 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -3
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/METADATA +5 -2
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/RECORD +42 -68
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/WHEEL +1 -1
- inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +0 -8
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +0 -24
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +0 -25
- inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +0 -22
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +0 -63
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +0 -71
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +0 -323
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +0 -5
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +0 -279
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +0 -9
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +0 -293
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +0 -94
- inspect_ai/tool/_tools/_web_browser/_resources/constants.py +0 -2
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +0 -2
- inspect_ai/tool/_tools/_web_browser/_resources/mock_environment.py +0 -45
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +0 -50
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +0 -48
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +0 -280
- inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +0 -65
- inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +0 -64
- inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +0 -146
- inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +0 -64
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +0 -180
- inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +0 -99
- inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +0 -15
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +0 -44
- inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +0 -39
- inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +0 -214
- inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +0 -35
- inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +0 -192
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info/licenses}/LICENSE +0 -0
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.76.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/evalset.py
CHANGED
@@ -35,7 +35,7 @@ from inspect_ai.model import (
|
|
35
35
|
from inspect_ai.model._generate_config import GenerateConfig
|
36
36
|
from inspect_ai.solver._solver import Solver, SolverSpec
|
37
37
|
from inspect_ai.util import DisplayType, SandboxEnvironmentType
|
38
|
-
from inspect_ai.util._display import init_display_type
|
38
|
+
from inspect_ai.util._display import display_type_initialized, init_display_type
|
39
39
|
|
40
40
|
from .eval import eval, eval_init
|
41
41
|
from .loader import resolve_task_args
|
@@ -234,7 +234,8 @@ def eval_set(
|
|
234
234
|
return results
|
235
235
|
|
236
236
|
# initialise display (otherwise eval_init will set it to full)
|
237
|
-
|
237
|
+
if not display_type_initialized():
|
238
|
+
display = init_display_type(display)
|
238
239
|
if display == "conversation":
|
239
240
|
raise RuntimeError("eval_set cannot be used with conversation display.")
|
240
241
|
|
inspect_ai/_eval/registry.py
CHANGED
@@ -75,12 +75,10 @@ def task_create(name: str, **kwargs: Any) -> Task:
|
|
75
75
|
task_params: list[str] = task_info.metadata["params"]
|
76
76
|
task_args: dict[str, Any] = {}
|
77
77
|
for param in kwargs.keys():
|
78
|
-
if param in task_params:
|
78
|
+
if param in task_params or "kwargs" in task_params:
|
79
79
|
task_args[param] = kwargs[param]
|
80
|
-
|
81
|
-
|
82
|
-
else:
|
83
|
-
logger.warning(f"param '{param}' not used by task '{name}'")
|
80
|
+
else:
|
81
|
+
logger.warning(f"param '{param}' not used by task '{name}'")
|
84
82
|
|
85
83
|
return cast(Task, registry_create("task", name, **task_args))
|
86
84
|
|
inspect_ai/_eval/run.py
CHANGED
@@ -4,6 +4,7 @@ import os
|
|
4
4
|
import sys
|
5
5
|
from typing import Awaitable, Callable, Set, cast
|
6
6
|
|
7
|
+
from inspect_ai._eval.task.task import Task
|
7
8
|
from inspect_ai._util.trace import trace_action
|
8
9
|
|
9
10
|
if sys.version_info < (3, 11):
|
@@ -81,6 +82,7 @@ async def eval_run(
|
|
81
82
|
eval_wd = os.getcwd()
|
82
83
|
|
83
84
|
# ensure sample ids
|
85
|
+
task: Task | None = None
|
84
86
|
for resolved_task in tasks:
|
85
87
|
# add sample ids to dataset if they aren't there (start at 1 not 0)
|
86
88
|
task = resolved_task.task
|
@@ -91,6 +93,8 @@ async def eval_run(
|
|
91
93
|
# Ensure sample ids are unique
|
92
94
|
ensure_unique_ids(task.dataset)
|
93
95
|
|
96
|
+
assert task, "Must encounter a task"
|
97
|
+
|
94
98
|
# run startup pass for the sandbox environments
|
95
99
|
shutdown_sandbox_environments: Callable[[], Awaitable[None]] | None = None
|
96
100
|
if has_sandbox:
|
inspect_ai/_eval/task/run.py
CHANGED
@@ -599,6 +599,10 @@ async def task_run_sample(
|
|
599
599
|
)
|
600
600
|
|
601
601
|
async with sandboxenv_cm:
|
602
|
+
timeout_cm: (
|
603
|
+
contextlib._GeneratorContextManager[anyio.CancelScope, None, None]
|
604
|
+
| contextlib.nullcontext[None]
|
605
|
+
) = contextlib.nullcontext()
|
602
606
|
try:
|
603
607
|
# update active sample wth sandboxes now that we are initialised
|
604
608
|
active.sandboxes = await sandbox_connections()
|
inspect_ai/_util/logger.py
CHANGED
@@ -150,6 +150,9 @@ def init_logger(log_level: str | None, log_level_transcript: str | None = None)
|
|
150
150
|
transcript_levelno=transcript_levelno,
|
151
151
|
)
|
152
152
|
|
153
|
+
# set the global log level
|
154
|
+
getLogger().setLevel(log_level)
|
155
|
+
|
153
156
|
# set the log level for our package
|
154
157
|
getLogger(PKG_NAME).setLevel(capture_level)
|
155
158
|
getLogger(PKG_NAME).addHandler(_logHandler)
|
@@ -16461,44 +16461,44 @@ ul.jsondiffpatch-textdiff {
|
|
16461
16461
|
font-weight: 600;
|
16462
16462
|
padding-bottom: 0.3em;
|
16463
16463
|
}
|
16464
|
-
.
|
16464
|
+
._output_15urk_1 {
|
16465
16465
|
padding-top: 1em;
|
16466
16466
|
}
|
16467
16467
|
|
16468
|
-
.
|
16468
|
+
._container_15urk_5 {
|
16469
16469
|
margin: 0.5em 0;
|
16470
16470
|
width: 100%;
|
16471
16471
|
}
|
16472
16472
|
|
16473
|
-
.
|
16473
|
+
._all_15urk_10 {
|
16474
16474
|
display: grid;
|
16475
16475
|
grid-template-columns: 1fr 1fr 1fr;
|
16476
16476
|
column-gap: 1em;
|
16477
16477
|
}
|
16478
16478
|
|
16479
|
-
.
|
16479
|
+
._tableSelection_15urk_16 {
|
16480
16480
|
width: fit-content;
|
16481
16481
|
align-self: start;
|
16482
16482
|
justify-self: start;
|
16483
16483
|
}
|
16484
16484
|
|
16485
|
-
.
|
16485
|
+
._tools_15urk_22 {
|
16486
16486
|
grid-column: -1/1;
|
16487
16487
|
}
|
16488
16488
|
|
16489
|
-
.
|
16489
|
+
._codePre_15urk_26 {
|
16490
16490
|
background: var(--bs-light);
|
16491
16491
|
width: 100%;
|
16492
16492
|
padding: 0.5em;
|
16493
16493
|
border-radius: var(--bs-border-radius);
|
16494
16494
|
}
|
16495
16495
|
|
16496
|
-
.
|
16497
|
-
white-space: pre-wrap;
|
16498
|
-
word-wrap: anywhere;
|
16496
|
+
._code_15urk_26 {
|
16497
|
+
white-space: pre-wrap !important;
|
16498
|
+
word-wrap: anywhere !important;
|
16499
16499
|
}
|
16500
16500
|
|
16501
|
-
.
|
16501
|
+
._toolConfig_15urk_38 {
|
16502
16502
|
display: grid;
|
16503
16503
|
grid-template-columns: max-content auto;
|
16504
16504
|
column-gap: 1em;
|
@@ -17032,12 +17032,14 @@ div.ap-player div.ap-control-bar * {
|
|
17032
17032
|
div.ap-control-bar svg.ap-icon path {
|
17033
17033
|
fill: var(--term-color-foreground);
|
17034
17034
|
}
|
17035
|
-
div.ap-control-bar span.ap-
|
17035
|
+
div.ap-control-bar span.ap-button {
|
17036
17036
|
display: flex;
|
17037
17037
|
flex: 0 0 auto;
|
17038
17038
|
cursor: pointer;
|
17039
|
-
|
17039
|
+
}
|
17040
|
+
div.ap-control-bar span.ap-playback-button {
|
17040
17041
|
width: 12px;
|
17042
|
+
height: 12px;
|
17041
17043
|
padding: 10px;
|
17042
17044
|
}
|
17043
17045
|
div.ap-control-bar span.ap-playback-button svg {
|
@@ -17104,13 +17106,9 @@ div.ap-control-bar.ap-seekable .ap-progressbar .ap-bar {
|
|
17104
17106
|
cursor: pointer;
|
17105
17107
|
}
|
17106
17108
|
div.ap-control-bar .ap-fullscreen-button {
|
17107
|
-
display: block;
|
17108
|
-
flex: 0 0 auto;
|
17109
17109
|
width: 14px;
|
17110
17110
|
height: 14px;
|
17111
17111
|
padding: 9px;
|
17112
|
-
cursor: pointer;
|
17113
|
-
position: relative;
|
17114
17112
|
}
|
17115
17113
|
div.ap-control-bar .ap-fullscreen-button svg {
|
17116
17114
|
width: 14px;
|
@@ -17127,6 +17125,20 @@ div.ap-control-bar .ap-fullscreen-button .ap-tooltip {
|
|
17127
17125
|
left: initial;
|
17128
17126
|
transform: none;
|
17129
17127
|
}
|
17128
|
+
div.ap-control-bar .ap-kbd-button {
|
17129
|
+
height: 14px;
|
17130
|
+
padding: 9px;
|
17131
|
+
margin: 0 4px;
|
17132
|
+
}
|
17133
|
+
div.ap-control-bar .ap-kbd-button svg {
|
17134
|
+
width: 26px;
|
17135
|
+
height: 14px;
|
17136
|
+
}
|
17137
|
+
div.ap-control-bar .ap-kbd-button .ap-tooltip {
|
17138
|
+
right: 5px;
|
17139
|
+
left: initial;
|
17140
|
+
transform: none;
|
17141
|
+
}
|
17130
17142
|
div.ap-wrapper.ap-hud .ap-control-bar {
|
17131
17143
|
opacity: 1;
|
17132
17144
|
}
|