inspect-ai 0.3.96__py3-none-any.whl → 0.3.98__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_eval/eval.py +10 -2
- inspect_ai/_eval/run.py +6 -1
- inspect_ai/_eval/task/util.py +32 -3
- inspect_ai/_util/registry.py +7 -0
- inspect_ai/_util/timer.py +13 -0
- inspect_ai/_view/www/dist/assets/index.css +275 -195
- inspect_ai/_view/www/dist/assets/index.js +8568 -7376
- inspect_ai/_view/www/src/app/App.css +1 -0
- inspect_ai/_view/www/src/app/App.tsx +27 -10
- inspect_ai/_view/www/src/app/appearance/icons.ts +5 -0
- inspect_ai/_view/www/src/app/content/RecordTree.module.css +22 -0
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +370 -0
- inspect_ai/_view/www/src/app/content/RenderedContent.module.css +5 -0
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +32 -19
- inspect_ai/_view/www/src/app/content/record_processors/store.ts +101 -0
- inspect_ai/_view/www/src/app/content/record_processors/types.ts +3 -0
- inspect_ai/_view/www/src/app/content/types.ts +5 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -0
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +35 -28
- inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +1 -8
- inspect_ai/_view/www/src/app/log-view/navbar/PrimaryBar.tsx +2 -4
- inspect_ai/_view/www/src/app/log-view/navbar/ResultsPanel.tsx +13 -3
- inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.module.css +15 -0
- inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.tsx +14 -10
- inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +9 -3
- inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +1 -3
- inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +8 -2
- inspect_ai/_view/www/src/app/log-view/types.ts +1 -0
- inspect_ai/_view/www/src/app/plan/ModelCard.module.css +7 -0
- inspect_ai/_view/www/src/app/plan/ModelCard.tsx +5 -2
- inspect_ai/_view/www/src/app/plan/PlanCard.tsx +13 -8
- inspect_ai/_view/www/src/app/routing/navigationHooks.ts +63 -8
- inspect_ai/_view/www/src/app/routing/url.ts +45 -0
- inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +2 -1
- inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.tsx +15 -8
- inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +3 -0
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +16 -5
- inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +68 -31
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +12 -7
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -5
- inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.module.css +9 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +48 -18
- inspect_ai/_view/www/src/app/samples/chat/ChatView.tsx +0 -1
- inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.module.css +4 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +41 -1
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -0
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +0 -3
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolOutput.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +5 -1
- inspect_ai/_view/www/src/app/samples/descriptor/score/PassFailScoreDescriptor.tsx +11 -6
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +7 -0
- inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +5 -18
- inspect_ai/_view/www/src/app/samples/sample-tools/SortFilter.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.tsx +18 -5
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.module.css +0 -6
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.tsx +4 -1
- inspect_ai/_view/www/src/app/samples/transcript/ApprovalEventView.tsx +4 -2
- inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +6 -4
- inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +13 -6
- inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +6 -4
- inspect_ai/_view/www/src/app/samples/transcript/LoggerEventView.tsx +4 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +11 -8
- inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +14 -8
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +13 -8
- inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +25 -16
- inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +7 -5
- inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +11 -28
- inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +12 -20
- inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +12 -31
- inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +25 -29
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +297 -0
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +0 -8
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +43 -25
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +43 -0
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +109 -43
- inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +19 -8
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +128 -60
- inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +14 -4
- inspect_ai/_view/www/src/app/samples/transcript/types.ts +6 -4
- inspect_ai/_view/www/src/app/types.ts +12 -1
- inspect_ai/_view/www/src/components/Card.css +6 -3
- inspect_ai/_view/www/src/components/Card.tsx +15 -2
- inspect_ai/_view/www/src/components/CopyButton.tsx +4 -6
- inspect_ai/_view/www/src/components/ExpandablePanel.module.css +20 -14
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +17 -22
- inspect_ai/_view/www/src/components/LargeModal.tsx +5 -1
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +25 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.css +4 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +2 -2
- inspect_ai/_view/www/src/components/TabSet.module.css +6 -1
- inspect_ai/_view/www/src/components/TabSet.tsx +8 -2
- inspect_ai/_view/www/src/state/hooks.ts +83 -13
- inspect_ai/_view/www/src/state/logPolling.ts +2 -2
- inspect_ai/_view/www/src/state/logSlice.ts +1 -2
- inspect_ai/_view/www/src/state/logsSlice.ts +9 -9
- inspect_ai/_view/www/src/state/samplePolling.ts +1 -1
- inspect_ai/_view/www/src/state/sampleSlice.ts +134 -7
- inspect_ai/_view/www/src/state/scoring.ts +1 -1
- inspect_ai/_view/www/src/state/scrolling.ts +39 -6
- inspect_ai/_view/www/src/state/store.ts +5 -0
- inspect_ai/_view/www/src/state/store_filter.ts +47 -44
- inspect_ai/_view/www/src/utils/debugging.ts +95 -0
- inspect_ai/_view/www/src/utils/format.ts +2 -2
- inspect_ai/_view/www/src/utils/json.ts +29 -0
- inspect_ai/agent/__init__.py +2 -1
- inspect_ai/agent/_agent.py +12 -0
- inspect_ai/agent/_react.py +184 -48
- inspect_ai/agent/_types.py +14 -1
- inspect_ai/analysis/beta/__init__.py +0 -2
- inspect_ai/analysis/beta/_dataframe/columns.py +11 -16
- inspect_ai/analysis/beta/_dataframe/evals/table.py +65 -40
- inspect_ai/analysis/beta/_dataframe/events/table.py +24 -36
- inspect_ai/analysis/beta/_dataframe/messages/table.py +24 -15
- inspect_ai/analysis/beta/_dataframe/progress.py +35 -5
- inspect_ai/analysis/beta/_dataframe/record.py +13 -9
- inspect_ai/analysis/beta/_dataframe/samples/columns.py +1 -1
- inspect_ai/analysis/beta/_dataframe/samples/table.py +156 -46
- inspect_ai/analysis/beta/_dataframe/util.py +14 -12
- inspect_ai/dataset/_dataset.py +0 -1
- inspect_ai/model/_call_tools.py +1 -1
- inspect_ai/model/_providers/anthropic.py +18 -5
- inspect_ai/model/_providers/azureai.py +7 -2
- inspect_ai/model/_providers/google.py +6 -0
- inspect_ai/model/_providers/util/llama31.py +3 -3
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/RECORD +134 -129
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.module.css +0 -48
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +0 -276
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.96.dist-info → inspect_ai-0.3.98.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/eval.py
CHANGED
@@ -28,7 +28,7 @@ from inspect_ai._util.error import PrerequisiteError
|
|
28
28
|
from inspect_ai._util.file import absolute_file_path
|
29
29
|
from inspect_ai._util.logger import warn_once
|
30
30
|
from inspect_ai._util.platform import platform_init
|
31
|
-
from inspect_ai._util.registry import registry_lookup
|
31
|
+
from inspect_ai._util.registry import registry_lookup, registry_package_name
|
32
32
|
from inspect_ai.approval._apply import init_tool_approval
|
33
33
|
from inspect_ai.approval._policy import (
|
34
34
|
ApprovalPolicy,
|
@@ -770,7 +770,15 @@ async def eval_retry_async(
|
|
770
770
|
task = f"{task_file}@{task_name}"
|
771
771
|
else:
|
772
772
|
if registry_lookup("task", task_name) is None:
|
773
|
-
|
773
|
+
# if this object is in a package then let the user know
|
774
|
+
# that they need to register it to work with eval-retry
|
775
|
+
package_name = registry_package_name(task_name)
|
776
|
+
if package_name is not None:
|
777
|
+
raise FileNotFoundError(
|
778
|
+
f"Task '{task_name}' is located in package '{package_name}' but has not been registered so cannot be retried. See https://inspect.aisi.org.uk/tasks.html#packaging for additional details on registering tasks in packages."
|
779
|
+
)
|
780
|
+
else:
|
781
|
+
raise FileNotFoundError(f"Task '{task_name}' not found.")
|
774
782
|
task = task_name
|
775
783
|
|
776
784
|
# see if there is solver spec in the eval log
|
inspect_ai/_eval/run.py
CHANGED
@@ -475,7 +475,12 @@ async def startup_sandbox_environments(
|
|
475
475
|
sandboxenvs: Set[TaskSandboxEnvironment] = set()
|
476
476
|
for task in tasks:
|
477
477
|
# resolve each sample and add to sandboxenvs
|
478
|
-
|
478
|
+
resolved_task_sample_ids = resolve_task_sample_ids(
|
479
|
+
task.task.name, config.sample_id
|
480
|
+
)
|
481
|
+
dataset = slice_dataset(
|
482
|
+
task.task.dataset, config.limit, resolved_task_sample_ids
|
483
|
+
)
|
479
484
|
for sample in dataset:
|
480
485
|
sandbox = await resolve_sandbox_for_task_and_sample(
|
481
486
|
eval_sandbox, task.task, sample
|
inspect_ai/_eval/task/util.py
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
import os
|
2
|
+
import reprlib
|
2
3
|
from copy import deepcopy
|
4
|
+
from logging import getLogger
|
3
5
|
from typing import cast
|
4
6
|
|
7
|
+
from inspect_ai._util.error import PrerequisiteError
|
8
|
+
from inspect_ai._util.logger import warn_once
|
5
9
|
from inspect_ai._util.path import cwd_relative_path
|
6
10
|
from inspect_ai.dataset import Sample
|
7
11
|
from inspect_ai.dataset._dataset import Dataset
|
@@ -10,6 +14,8 @@ from inspect_ai.model import ChatMessage, ChatMessageUser
|
|
10
14
|
from ..task import Task
|
11
15
|
from .constants import TASK_FILE_ATTR, TASK_RUN_DIR_ATTR
|
12
16
|
|
17
|
+
logger = getLogger(__name__)
|
18
|
+
|
13
19
|
|
14
20
|
def sample_messages(sample: Sample) -> list[ChatMessage]:
|
15
21
|
if isinstance(sample.input, str):
|
@@ -47,9 +53,32 @@ def slice_dataset(
|
|
47
53
|
return id if isinstance(id, str) else str(id).zfill(20)
|
48
54
|
|
49
55
|
if sample_id is not None:
|
50
|
-
|
51
|
-
|
52
|
-
|
56
|
+
# reduce to list of normalized sample ids
|
57
|
+
sample_ids = sample_id if isinstance(sample_id, list) else [sample_id]
|
58
|
+
sample_id = [normalise(id) for id in sample_ids]
|
59
|
+
|
60
|
+
# validate all the sample ids and warn if they aren't in the dataset
|
61
|
+
all_sample_ids_raw = [sample.id for sample in dataset]
|
62
|
+
all_sample_ids = [normalise(id) for id in all_sample_ids_raw]
|
63
|
+
for id in sample_id:
|
64
|
+
if id not in all_sample_ids:
|
65
|
+
warn_once(
|
66
|
+
logger, f"sample id '{id}' not found in dataset '{dataset.name}'."
|
67
|
+
)
|
68
|
+
|
69
|
+
# filter the dataset
|
70
|
+
filtered = dataset.filter(lambda sample: normalise(sample.id) in sample_id)
|
71
|
+
|
72
|
+
# raise error if we got no hits
|
73
|
+
if len(filtered) == 0:
|
74
|
+
filter = ",".join([str(id) for id in sample_id])
|
75
|
+
r = reprlib.Repr()
|
76
|
+
r.maxlist = 8
|
77
|
+
raise PrerequisiteError(
|
78
|
+
f"No matches in dataset '{dataset.name}' for sample_id filter '{filter}'\n({dataset.name} ids: {r.repr(all_sample_ids_raw)})"
|
79
|
+
)
|
80
|
+
|
81
|
+
return filtered
|
53
82
|
else:
|
54
83
|
dataset_limit = (
|
55
84
|
slice(0, len(dataset))
|
inspect_ai/_util/registry.py
CHANGED
@@ -183,6 +183,13 @@ def registry_lookup(type: RegistryType, name: str) -> object | None:
|
|
183
183
|
return o
|
184
184
|
|
185
185
|
|
186
|
+
def registry_package_name(name: str) -> str | None:
|
187
|
+
if name.find("/") != -1 and name.find(".") == -1:
|
188
|
+
return name.split("/")[0]
|
189
|
+
else:
|
190
|
+
return None
|
191
|
+
|
192
|
+
|
186
193
|
def registry_find(predicate: Callable[[RegistryInfo], bool]) -> list[object]:
|
187
194
|
r"""Find objects in the registry that match the passed predicate.
|
188
195
|
|
@@ -0,0 +1,13 @@
|
|
1
|
+
import time
|
2
|
+
from contextlib import contextmanager
|
3
|
+
from typing import Iterator
|
4
|
+
|
5
|
+
|
6
|
+
@contextmanager
|
7
|
+
def execution_timer(name: str | None = None) -> Iterator[None]:
|
8
|
+
start_time = time.perf_counter()
|
9
|
+
yield
|
10
|
+
end_time = time.perf_counter()
|
11
|
+
print(
|
12
|
+
f"{name if name else ''} execution time: {end_time - start_time:.6f} seconds".strip()
|
13
|
+
)
|