inspect-ai 0.3.95__py3-none-any.whl → 0.3.97__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_eval/eval.py +10 -2
- inspect_ai/_eval/task/util.py +32 -3
- inspect_ai/_util/local_server.py +16 -0
- inspect_ai/_util/registry.py +7 -0
- inspect_ai/_util/timer.py +13 -0
- inspect_ai/_view/www/dist/assets/index.css +275 -195
- inspect_ai/_view/www/dist/assets/index.js +8568 -7376
- inspect_ai/_view/www/src/app/App.css +1 -0
- inspect_ai/_view/www/src/app/App.tsx +27 -10
- inspect_ai/_view/www/src/app/appearance/icons.ts +5 -0
- inspect_ai/_view/www/src/app/content/RecordTree.module.css +22 -0
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +370 -0
- inspect_ai/_view/www/src/app/content/RenderedContent.module.css +5 -0
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +32 -19
- inspect_ai/_view/www/src/app/content/record_processors/store.ts +101 -0
- inspect_ai/_view/www/src/app/content/record_processors/types.ts +3 -0
- inspect_ai/_view/www/src/app/content/types.ts +5 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -0
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +35 -28
- inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +1 -8
- inspect_ai/_view/www/src/app/log-view/navbar/PrimaryBar.tsx +2 -4
- inspect_ai/_view/www/src/app/log-view/navbar/ResultsPanel.tsx +13 -3
- inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.module.css +15 -0
- inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.tsx +14 -10
- inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +9 -3
- inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +1 -3
- inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +8 -2
- inspect_ai/_view/www/src/app/log-view/types.ts +1 -0
- inspect_ai/_view/www/src/app/plan/ModelCard.module.css +7 -0
- inspect_ai/_view/www/src/app/plan/ModelCard.tsx +5 -2
- inspect_ai/_view/www/src/app/plan/PlanCard.tsx +13 -8
- inspect_ai/_view/www/src/app/routing/navigationHooks.ts +63 -8
- inspect_ai/_view/www/src/app/routing/url.ts +45 -0
- inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +2 -1
- inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.tsx +15 -8
- inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +3 -0
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +16 -5
- inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +68 -31
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +12 -7
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -5
- inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.module.css +9 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +48 -18
- inspect_ai/_view/www/src/app/samples/chat/ChatView.tsx +0 -1
- inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.module.css +4 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +41 -1
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -0
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +0 -3
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolOutput.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +5 -1
- inspect_ai/_view/www/src/app/samples/descriptor/score/PassFailScoreDescriptor.tsx +11 -6
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +7 -0
- inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +5 -18
- inspect_ai/_view/www/src/app/samples/sample-tools/SortFilter.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.tsx +18 -5
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.module.css +0 -6
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.tsx +4 -1
- inspect_ai/_view/www/src/app/samples/transcript/ApprovalEventView.tsx +4 -2
- inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +6 -4
- inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +13 -6
- inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +6 -4
- inspect_ai/_view/www/src/app/samples/transcript/LoggerEventView.tsx +4 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +11 -8
- inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +14 -8
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +13 -8
- inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +25 -16
- inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +7 -5
- inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +11 -28
- inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +12 -20
- inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +12 -31
- inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +25 -29
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +297 -0
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +0 -8
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +43 -25
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +43 -0
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +109 -43
- inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +19 -8
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +128 -60
- inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +14 -4
- inspect_ai/_view/www/src/app/samples/transcript/types.ts +6 -4
- inspect_ai/_view/www/src/app/types.ts +12 -1
- inspect_ai/_view/www/src/components/Card.css +6 -3
- inspect_ai/_view/www/src/components/Card.tsx +15 -2
- inspect_ai/_view/www/src/components/CopyButton.tsx +4 -6
- inspect_ai/_view/www/src/components/ExpandablePanel.module.css +20 -14
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +17 -22
- inspect_ai/_view/www/src/components/LargeModal.tsx +5 -1
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +25 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.css +4 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +2 -2
- inspect_ai/_view/www/src/components/TabSet.module.css +6 -1
- inspect_ai/_view/www/src/components/TabSet.tsx +8 -2
- inspect_ai/_view/www/src/state/hooks.ts +83 -13
- inspect_ai/_view/www/src/state/logPolling.ts +2 -2
- inspect_ai/_view/www/src/state/logSlice.ts +1 -2
- inspect_ai/_view/www/src/state/logsSlice.ts +9 -9
- inspect_ai/_view/www/src/state/samplePolling.ts +1 -1
- inspect_ai/_view/www/src/state/sampleSlice.ts +134 -7
- inspect_ai/_view/www/src/state/scoring.ts +1 -1
- inspect_ai/_view/www/src/state/scrolling.ts +39 -6
- inspect_ai/_view/www/src/state/store.ts +5 -0
- inspect_ai/_view/www/src/state/store_filter.ts +47 -44
- inspect_ai/_view/www/src/utils/debugging.ts +95 -0
- inspect_ai/_view/www/src/utils/format.ts +2 -2
- inspect_ai/_view/www/src/utils/json.ts +29 -0
- inspect_ai/agent/__init__.py +2 -1
- inspect_ai/agent/_agent.py +12 -0
- inspect_ai/agent/_react.py +184 -48
- inspect_ai/agent/_types.py +15 -2
- inspect_ai/analysis/beta/__init__.py +11 -3
- inspect_ai/analysis/beta/_dataframe/columns.py +11 -16
- inspect_ai/analysis/beta/_dataframe/evals/table.py +101 -39
- inspect_ai/analysis/beta/_dataframe/events/columns.py +50 -0
- inspect_ai/analysis/beta/_dataframe/events/extract.py +26 -0
- inspect_ai/analysis/beta/_dataframe/events/table.py +77 -3
- inspect_ai/analysis/beta/_dataframe/extract.py +44 -25
- inspect_ai/analysis/beta/_dataframe/messages/columns.py +1 -1
- inspect_ai/analysis/beta/_dataframe/messages/table.py +30 -29
- inspect_ai/analysis/beta/_dataframe/progress.py +56 -0
- inspect_ai/analysis/beta/_dataframe/record.py +13 -9
- inspect_ai/analysis/beta/_dataframe/samples/columns.py +8 -4
- inspect_ai/analysis/beta/_dataframe/samples/extract.py +5 -33
- inspect_ai/analysis/beta/_dataframe/samples/table.py +211 -60
- inspect_ai/analysis/beta/_dataframe/util.py +33 -28
- inspect_ai/log/_file.py +9 -2
- inspect_ai/model/_call_tools.py +1 -1
- inspect_ai/model/_providers/anthropic.py +18 -5
- inspect_ai/model/_providers/azureai.py +7 -2
- inspect_ai/model/_providers/util/llama31.py +3 -3
- inspect_ai/solver/_task_state.py +1 -1
- inspect_ai/tool/_mcp/_sandbox.py +17 -14
- {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/RECORD +140 -133
- {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.module.css +0 -48
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +0 -276
- {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/top_level.txt +0 -0
inspect_ai/tool/_mcp/_sandbox.py
CHANGED
@@ -5,6 +5,7 @@ from typing import TextIO
|
|
5
5
|
import anyio
|
6
6
|
from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
|
7
7
|
from mcp import JSONRPCRequest, StdioServerParameters
|
8
|
+
from mcp.shared.message import SessionMessage
|
8
9
|
from mcp.types import JSONRPCMessage, JSONRPCNotification
|
9
10
|
|
10
11
|
from inspect_ai.tool._tool_support_helpers import (
|
@@ -36,12 +37,12 @@ async def sandbox_client( # type: ignore
|
|
36
37
|
)
|
37
38
|
|
38
39
|
# read_stream is remote process's stdout
|
39
|
-
read_stream: MemoryObjectReceiveStream[
|
40
|
-
read_stream_writer: MemoryObjectSendStream[
|
40
|
+
read_stream: MemoryObjectReceiveStream[SessionMessage | Exception]
|
41
|
+
read_stream_writer: MemoryObjectSendStream[SessionMessage | Exception]
|
41
42
|
|
42
43
|
# write_stream is remote process's stdin
|
43
|
-
write_stream: MemoryObjectSendStream[
|
44
|
-
write_stream_reader: MemoryObjectReceiveStream[
|
44
|
+
write_stream: MemoryObjectSendStream[SessionMessage]
|
45
|
+
write_stream_reader: MemoryObjectReceiveStream[SessionMessage]
|
45
46
|
|
46
47
|
read_stream_writer, read_stream = anyio.create_memory_object_stream(0)
|
47
48
|
write_stream, write_stream_reader = anyio.create_memory_object_stream(0)
|
@@ -64,18 +65,20 @@ async def sandbox_client( # type: ignore
|
|
64
65
|
async with write_stream_reader:
|
65
66
|
# This reads messages until the stream is closed
|
66
67
|
async for message in write_stream_reader:
|
67
|
-
root = message.root
|
68
|
+
root = message.message.root
|
68
69
|
if isinstance(root, JSONRPCRequest):
|
69
70
|
await read_stream_writer.send(
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
71
|
+
SessionMessage(
|
72
|
+
message=await exec_model_request(
|
73
|
+
sandbox=sandbox_environment,
|
74
|
+
method="mcp_send_request",
|
75
|
+
params={
|
76
|
+
"session_id": session_id,
|
77
|
+
"request": root.model_dump(),
|
78
|
+
},
|
79
|
+
result_type=JSONRPCMessage,
|
80
|
+
timeout=timeout,
|
81
|
+
)
|
79
82
|
)
|
80
83
|
)
|
81
84
|
elif isinstance(root, JSONRPCNotification):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.97
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Security Institute
|
6
6
|
License: MIT License
|
@@ -23,7 +23,7 @@ License-File: LICENSE
|
|
23
23
|
Requires-Dist: aiohttp>=3.9.0
|
24
24
|
Requires-Dist: anyio>=4.8.0
|
25
25
|
Requires-Dist: beautifulsoup4
|
26
|
-
Requires-Dist: click
|
26
|
+
Requires-Dist: click<8.2.0,>=8.1.3
|
27
27
|
Requires-Dist: debugpy
|
28
28
|
Requires-Dist: docstring-parser>=0.16
|
29
29
|
Requires-Dist: exceptiongroup>=1.0.2; python_version < "3.11"
|