inspect-ai 0.3.82__py3-none-any.whl → 0.3.84__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_display/textual/app.py +14 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +9 -3
- inspect_ai/_display/textual/widgets/task_detail.py +3 -4
- inspect_ai/_display/textual/widgets/tasks.py +17 -1
- inspect_ai/_display/textual/widgets/vscode.py +48 -0
- inspect_ai/_eval/eval.py +36 -24
- inspect_ai/_eval/evalset.py +17 -18
- inspect_ai/_eval/loader.py +34 -11
- inspect_ai/_eval/run.py +8 -13
- inspect_ai/_eval/score.py +13 -3
- inspect_ai/_eval/task/generate.py +8 -9
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/task.py +23 -9
- inspect_ai/_util/file.py +13 -0
- inspect_ai/_util/json.py +2 -1
- inspect_ai/_util/registry.py +1 -0
- inspect_ai/_util/vscode.py +37 -0
- inspect_ai/_view/www/App.css +6 -0
- inspect_ai/_view/www/dist/assets/index.css +304 -128
- inspect_ai/_view/www/dist/assets/index.js +47495 -27519
- inspect_ai/_view/www/log-schema.json +124 -31
- inspect_ai/_view/www/package.json +3 -0
- inspect_ai/_view/www/src/App.tsx +12 -0
- inspect_ai/_view/www/src/appearance/icons.ts +1 -0
- inspect_ai/_view/www/src/components/Card.tsx +6 -4
- inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
- inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
- inspect_ai/_view/www/src/components/Modal.module.css +38 -0
- inspect_ai/_view/www/src/components/Modal.tsx +77 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
- inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
- inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
- inspect_ai/_view/www/src/state/hooks.ts +5 -3
- inspect_ai/_view/www/src/state/logPolling.ts +5 -1
- inspect_ai/_view/www/src/state/logSlice.ts +10 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
- inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
- inspect_ai/_view/www/src/types/log.d.ts +34 -26
- inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
- inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
- inspect_ai/_view/www/yarn.lock +94 -1
- inspect_ai/agent/__init__.py +36 -0
- inspect_ai/agent/_agent.py +268 -0
- inspect_ai/agent/_as_solver.py +72 -0
- inspect_ai/agent/_as_tool.py +122 -0
- inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
- inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
- inspect_ai/agent/_filter.py +46 -0
- inspect_ai/agent/_handoff.py +93 -0
- inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
- inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
- inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
- inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
- inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
- inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
- inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
- inspect_ai/agent/_react.py +241 -0
- inspect_ai/agent/_run.py +36 -0
- inspect_ai/agent/_types.py +81 -0
- inspect_ai/log/_log.py +11 -2
- inspect_ai/log/_transcript.py +13 -9
- inspect_ai/model/__init__.py +7 -1
- inspect_ai/model/_call_tools.py +256 -52
- inspect_ai/model/_chat_message.py +7 -4
- inspect_ai/model/_conversation.py +13 -62
- inspect_ai/model/_display.py +85 -0
- inspect_ai/model/_model.py +113 -14
- inspect_ai/model/_model_output.py +14 -9
- inspect_ai/model/_openai.py +16 -4
- inspect_ai/model/_openai_computer_use.py +162 -0
- inspect_ai/model/_openai_responses.py +319 -165
- inspect_ai/model/_providers/anthropic.py +20 -21
- inspect_ai/model/_providers/azureai.py +24 -13
- inspect_ai/model/_providers/bedrock.py +1 -7
- inspect_ai/model/_providers/cloudflare.py +3 -3
- inspect_ai/model/_providers/goodfire.py +2 -6
- inspect_ai/model/_providers/google.py +11 -10
- inspect_ai/model/_providers/groq.py +6 -3
- inspect_ai/model/_providers/hf.py +7 -3
- inspect_ai/model/_providers/mistral.py +7 -10
- inspect_ai/model/_providers/openai.py +47 -17
- inspect_ai/model/_providers/openai_o1.py +11 -4
- inspect_ai/model/_providers/openai_responses.py +12 -14
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/together.py +12 -2
- inspect_ai/model/_providers/util/chatapi.py +7 -2
- inspect_ai/model/_providers/util/hf_handler.py +4 -2
- inspect_ai/model/_providers/util/llama31.py +4 -2
- inspect_ai/model/_providers/vertex.py +11 -9
- inspect_ai/model/_providers/vllm.py +4 -4
- inspect_ai/scorer/__init__.py +2 -0
- inspect_ai/scorer/_metrics/__init__.py +2 -0
- inspect_ai/scorer/_metrics/grouped.py +84 -0
- inspect_ai/scorer/_score.py +26 -6
- inspect_ai/solver/__init__.py +2 -2
- inspect_ai/solver/_basic_agent.py +22 -9
- inspect_ai/solver/_bridge.py +31 -0
- inspect_ai/solver/_chain.py +20 -12
- inspect_ai/solver/_fork.py +5 -1
- inspect_ai/solver/_human_agent.py +52 -0
- inspect_ai/solver/_prompt.py +3 -1
- inspect_ai/solver/_run.py +59 -0
- inspect_ai/solver/_solver.py +14 -4
- inspect_ai/solver/_task_state.py +5 -3
- inspect_ai/tool/_tool_call.py +15 -8
- inspect_ai/tool/_tool_def.py +17 -12
- inspect_ai/tool/_tool_support_helpers.py +2 -2
- inspect_ai/tool/_tool_with.py +14 -11
- inspect_ai/tool/_tools/_bash_session.py +11 -2
- inspect_ai/tool/_tools/_computer/_common.py +18 -2
- inspect_ai/tool/_tools/_computer/_computer.py +18 -2
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
- inspect_ai/tool/_tools/_think.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_anyio.py +27 -0
- inspect_ai/util/_sandbox/__init__.py +2 -1
- inspect_ai/util/_sandbox/context.py +32 -7
- inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/docker/docker.py +12 -1
- inspect_ai/util/_store_model.py +30 -7
- inspect_ai/util/_subprocess.py +13 -3
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/RECORD +179 -153
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
- /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/task/task.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
from copy import deepcopy
|
2
1
|
from dataclasses import dataclass
|
3
2
|
from logging import getLogger
|
4
3
|
from typing import Any, Awaitable, Callable, Sequence, cast
|
@@ -9,6 +8,8 @@ from typing_extensions import TypedDict, Unpack
|
|
9
8
|
from inspect_ai._util.logger import warn_once
|
10
9
|
from inspect_ai._util.notgiven import NOT_GIVEN, NotGiven
|
11
10
|
from inspect_ai._util.registry import is_registry_object, registry_info
|
11
|
+
from inspect_ai.agent._agent import Agent, is_agent
|
12
|
+
from inspect_ai.agent._as_solver import as_solver
|
12
13
|
from inspect_ai.approval._policy import ApprovalPolicy, approval_policies_from_config
|
13
14
|
from inspect_ai.dataset import Dataset, MemoryDataset, Sample
|
14
15
|
from inspect_ai.log import EvalLog
|
@@ -47,7 +48,7 @@ class Task:
|
|
47
48
|
self,
|
48
49
|
dataset: Dataset | Sequence[Sample] | None = None,
|
49
50
|
setup: Solver | list[Solver] | None = None,
|
50
|
-
solver: Solver | list[Solver] = generate(),
|
51
|
+
solver: Solver | Agent | list[Solver] = generate(),
|
51
52
|
cleanup: Callable[[TaskState], Awaitable[None]] | None = None,
|
52
53
|
scorer: Scorer | list[Scorer] | None = None,
|
53
54
|
metrics: list[Metric] | dict[str, list[Metric]] | None = None,
|
@@ -158,6 +159,13 @@ class Task:
|
|
158
159
|
else:
|
159
160
|
return "task"
|
160
161
|
|
162
|
+
@property
|
163
|
+
def registry_name(self) -> str | None:
|
164
|
+
if is_registry_object(self):
|
165
|
+
return registry_info(self).name
|
166
|
+
else:
|
167
|
+
return None
|
168
|
+
|
161
169
|
@property
|
162
170
|
def attribs(self) -> dict[str, Any]:
|
163
171
|
if is_registry_object(self):
|
@@ -191,8 +199,12 @@ def task_with(
|
|
191
199
|
) -> Task:
|
192
200
|
"""Task adapted with alternate values for one or more options.
|
193
201
|
|
202
|
+
This function modifies the passed task in place and returns it.
|
203
|
+
If you want to create multiple variations of a single task using
|
204
|
+
`task_with()` you should create the underlying task multiple times.
|
205
|
+
|
194
206
|
Args:
|
195
|
-
task: Task to adapt
|
207
|
+
task: Task to adapt
|
196
208
|
dataset: Dataset to evaluate
|
197
209
|
setup: Setup step (always run even when the main `solver` is replaced).
|
198
210
|
solver: Solver or list of solvers. Defaults to generate(), a normal call to the model.
|
@@ -227,11 +239,8 @@ def task_with(
|
|
227
239
|
metadata: Additional metadata to associate with the task.
|
228
240
|
|
229
241
|
Returns:
|
230
|
-
Task:
|
242
|
+
Task: Passed `task` with modifications.
|
231
243
|
"""
|
232
|
-
# deep copy the task
|
233
|
-
task = deepcopy(task)
|
234
|
-
|
235
244
|
if not isinstance(dataset, NotGiven):
|
236
245
|
task.dataset = resolve_dataset(dataset)
|
237
246
|
if not isinstance(setup, NotGiven):
|
@@ -340,8 +349,13 @@ def resolve_dataset(dataset: Dataset | Sequence[Sample] | None) -> Dataset:
|
|
340
349
|
return dataset if isinstance(dataset, Dataset) else MemoryDataset(list(dataset))
|
341
350
|
|
342
351
|
|
343
|
-
def resolve_solver(solver: Solver | list[Solver]) -> Solver:
|
344
|
-
|
352
|
+
def resolve_solver(solver: Solver | Agent | list[Solver]) -> Solver:
|
353
|
+
if isinstance(solver, list):
|
354
|
+
return chain(solver)
|
355
|
+
elif is_agent(solver):
|
356
|
+
return as_solver(solver)
|
357
|
+
else:
|
358
|
+
return cast(Solver, solver)
|
345
359
|
|
346
360
|
|
347
361
|
def resolve_model(model: str | Model | None) -> Model | None:
|
inspect_ai/_util/file.py
CHANGED
@@ -322,6 +322,19 @@ def absolute_file_path(file: str) -> str:
|
|
322
322
|
return file
|
323
323
|
|
324
324
|
|
325
|
+
def to_uri(path_or_uri: str) -> str:
|
326
|
+
# Check if it's already a URI
|
327
|
+
parsed = urlparse(path_or_uri)
|
328
|
+
|
329
|
+
if parsed.scheme:
|
330
|
+
# Already has a scheme, return as is
|
331
|
+
return path_or_uri
|
332
|
+
|
333
|
+
# It's a file path, convert to URI
|
334
|
+
path_obj = Path(path_or_uri).absolute()
|
335
|
+
return path_obj.as_uri()
|
336
|
+
|
337
|
+
|
325
338
|
def default_fs_options(file: str) -> dict[str, Any]:
|
326
339
|
scheme = urlparse(file).scheme
|
327
340
|
if (
|
inspect_ai/_util/json.py
CHANGED
@@ -8,7 +8,8 @@ import jsonpatch
|
|
8
8
|
from pydantic import BaseModel, Field, JsonValue
|
9
9
|
from pydantic_core import to_json, to_jsonable_python
|
10
10
|
|
11
|
-
|
11
|
+
JSONType = Literal["string", "integer", "number", "boolean", "array", "object", "null"]
|
12
|
+
"""Valid types within JSON schema."""
|
12
13
|
|
13
14
|
|
14
15
|
def jsonable_python(x: Any) -> Any:
|
inspect_ai/_util/registry.py
CHANGED
inspect_ai/_util/vscode.py
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
import os
|
2
|
+
from logging import getLogger
|
2
3
|
from pathlib import Path
|
3
4
|
from typing import Any
|
4
5
|
|
5
6
|
from pydantic import BaseModel, Field
|
6
7
|
from pydantic_core import to_json
|
8
|
+
from semver import Version
|
7
9
|
from shortuuid import uuid
|
8
10
|
|
9
11
|
from .appdirs import inspect_data_dir
|
10
12
|
|
13
|
+
logger = getLogger(__name__)
|
14
|
+
|
15
|
+
EXTENSION_COMMAND_VERSIONS = {"inspect.openLogViewer": Version(0, 3, 61)}
|
16
|
+
|
11
17
|
|
12
18
|
class VSCodeCommand(BaseModel):
|
13
19
|
command: str
|
@@ -34,6 +40,25 @@ def can_execute_vscode_commands() -> bool:
|
|
34
40
|
return vs_code_commands_dir() is not None
|
35
41
|
|
36
42
|
|
43
|
+
def can_execute_vscode_command(command: str) -> bool:
|
44
|
+
if not can_execute_vscode_commands():
|
45
|
+
return False
|
46
|
+
|
47
|
+
required_version = EXTENSION_COMMAND_VERSIONS.get(command)
|
48
|
+
if required_version is None:
|
49
|
+
return True
|
50
|
+
else:
|
51
|
+
return has_vscode_version(required_version)
|
52
|
+
|
53
|
+
|
54
|
+
def has_vscode_version(required_version: Version) -> bool:
|
55
|
+
current_version = vscode_extension_version()
|
56
|
+
if current_version is None:
|
57
|
+
return False
|
58
|
+
else:
|
59
|
+
return current_version.is_compatible(required_version)
|
60
|
+
|
61
|
+
|
37
62
|
def vs_code_commands_dir() -> Path | None:
|
38
63
|
workspace_id = vscode_workspace_id()
|
39
64
|
if workspace_id:
|
@@ -49,3 +74,15 @@ def vs_code_commands_dir() -> Path | None:
|
|
49
74
|
|
50
75
|
def vscode_workspace_id() -> str | None:
|
51
76
|
return os.environ.get("INSPECT_WORKSPACE_ID", None)
|
77
|
+
|
78
|
+
|
79
|
+
def vscode_extension_version() -> Version | None:
|
80
|
+
version = os.environ.get("INSPECT_VSCODE_EXT_VERSION", None)
|
81
|
+
if version is not None:
|
82
|
+
try:
|
83
|
+
return Version.parse(version)
|
84
|
+
except Exception:
|
85
|
+
logger.warning(f"Invalid Inspect vscode extension version: {version}")
|
86
|
+
return None
|
87
|
+
else:
|
88
|
+
return None
|
inspect_ai/_view/www/App.css
CHANGED
@@ -31,6 +31,10 @@
|
|
31
31
|
--inspect-font-size-base: 0.9rem;
|
32
32
|
--inspect-font-size-small: 0.8rem;
|
33
33
|
--inspect-font-size-smaller: 0.8rem;
|
34
|
+
|
35
|
+
/* Inspect Glass */
|
36
|
+
--inspect-glass-color: #000000;
|
37
|
+
--inspect-glass-opacity: 0.3;
|
34
38
|
}
|
35
39
|
|
36
40
|
body:not([class^="vscode-"]) button {
|
@@ -154,6 +158,8 @@ body[class^="vscode-"] {
|
|
154
158
|
--inspect-input-border: var(--vscode-input-border);
|
155
159
|
--inspect-diff-add-color: var(--vscode-diffEditor-insertedTextBackground);
|
156
160
|
--inspect-diff-remove-color: var(--vscode-diffEditor-removedTextBackground);
|
161
|
+
--inspect-glass-color: var(--vscode-editor-foreground);
|
162
|
+
--inspect-glass-opacity: 0.15;
|
157
163
|
}
|
158
164
|
|
159
165
|
html.vscode {
|