inspect-ai 0.3.81__py3-none-any.whl → 0.3.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +35 -2
- inspect_ai/_cli/util.py +44 -1
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +13 -4
- inspect_ai/_display/core/results.py +1 -1
- inspect_ai/_display/textual/widgets/task_detail.py +5 -4
- inspect_ai/_eval/eval.py +38 -1
- inspect_ai/_eval/evalset.py +5 -0
- inspect_ai/_eval/run.py +5 -2
- inspect_ai/_eval/task/log.py +53 -6
- inspect_ai/_eval/task/run.py +51 -10
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/file.py +17 -1
- inspect_ai/_util/json.py +36 -1
- inspect_ai/_view/server.py +113 -1
- inspect_ai/_view/www/App.css +1 -1
- inspect_ai/_view/www/dist/assets/index.css +518 -296
- inspect_ai/_view/www/dist/assets/index.js +38803 -36307
- inspect_ai/_view/www/eslint.config.mjs +1 -1
- inspect_ai/_view/www/log-schema.json +13 -0
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/package.json +8 -2
- inspect_ai/_view/www/src/App.tsx +151 -855
- inspect_ai/_view/www/src/api/api-browser.ts +176 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
- inspect_ai/_view/www/src/api/client-api.ts +66 -10
- inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
- inspect_ai/_view/www/src/api/types.ts +107 -2
- inspect_ai/_view/www/src/appearance/icons.ts +1 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
- inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
- inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
- inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -3
- inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
- inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
- inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
- inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
- inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
- inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
- inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
- inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
- inspect_ai/_view/www/src/index.tsx +26 -94
- inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +67 -28
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +51 -22
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +144 -90
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +82 -35
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +23 -30
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +4 -1
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +3 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +34 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +10 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +25 -17
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +2 -1
- inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +21 -3
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +20 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +105 -85
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +27 -14
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
- inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
- inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +7 -9
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +7 -11
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +8 -13
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +52 -58
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +30 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
- inspect_ai/_view/www/src/scoring/utils.ts +87 -0
- inspect_ai/_view/www/src/state/appSlice.ts +244 -0
- inspect_ai/_view/www/src/state/hooks.ts +397 -0
- inspect_ai/_view/www/src/state/logPolling.ts +196 -0
- inspect_ai/_view/www/src/state/logSlice.ts +214 -0
- inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
- inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +311 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +127 -0
- inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
- inspect_ai/_view/www/src/state/scrolling.ts +206 -0
- inspect_ai/_view/www/src/state/store.ts +168 -0
- inspect_ai/_view/www/src/state/store_filter.ts +84 -0
- inspect_ai/_view/www/src/state/utils.ts +23 -0
- inspect_ai/_view/www/src/storage/index.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +2 -0
- inspect_ai/_view/www/src/types.ts +94 -32
- inspect_ai/_view/www/src/utils/attachments.ts +58 -23
- inspect_ai/_view/www/src/utils/logger.ts +52 -0
- inspect_ai/_view/www/src/utils/polling.ts +100 -0
- inspect_ai/_view/www/src/utils/react.ts +30 -0
- inspect_ai/_view/www/src/utils/vscode.ts +1 -1
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +181 -216
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +0 -1
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +98 -39
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +11 -13
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +110 -115
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
- inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
- inspect_ai/_view/www/src/workspace/types.ts +4 -3
- inspect_ai/_view/www/src/workspace/utils.ts +4 -4
- inspect_ai/_view/www/vite.config.js +6 -0
- inspect_ai/_view/www/yarn.lock +370 -354
- inspect_ai/log/_condense.py +26 -0
- inspect_ai/log/_log.py +6 -3
- inspect_ai/log/_recorders/buffer/__init__.py +14 -0
- inspect_ai/log/_recorders/buffer/buffer.py +30 -0
- inspect_ai/log/_recorders/buffer/database.py +685 -0
- inspect_ai/log/_recorders/buffer/filestore.py +259 -0
- inspect_ai/log/_recorders/buffer/types.py +84 -0
- inspect_ai/log/_recorders/eval.py +2 -11
- inspect_ai/log/_recorders/types.py +30 -0
- inspect_ai/log/_transcript.py +27 -1
- inspect_ai/model/_call_tools.py +1 -0
- inspect_ai/model/_generate_config.py +2 -2
- inspect_ai/model/_model.py +1 -0
- inspect_ai/tool/_tool_support_helpers.py +4 -4
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
- inspect_ai/util/_subtask.py +1 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/RECORD +178 -138
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/top_level.txt +0 -0
inspect_ai/log/_condense.py
CHANGED
@@ -25,6 +25,7 @@ from inspect_ai.model._model_output import ModelOutput
|
|
25
25
|
from ._log import EvalSample
|
26
26
|
from ._transcript import (
|
27
27
|
Event,
|
28
|
+
InfoEvent,
|
28
29
|
ModelEvent,
|
29
30
|
SampleInitEvent,
|
30
31
|
StateEvent,
|
@@ -133,6 +134,25 @@ def resolve_sample_attachments(sample: EvalSample) -> EvalSample:
|
|
133
134
|
)
|
134
135
|
|
135
136
|
|
137
|
+
def attachments_content_fn(
|
138
|
+
log_images: bool, max_length: int, attachments: dict[str, str]
|
139
|
+
) -> Callable[[str], str]:
|
140
|
+
def create_attachment(text: str) -> str:
|
141
|
+
hash = mm3_hash(text)
|
142
|
+
attachments[hash] = text
|
143
|
+
return f"{ATTACHMENT_PROTOCOL}{hash}"
|
144
|
+
|
145
|
+
def content_fn(text: str) -> str:
|
146
|
+
if not log_images and is_data_uri(text):
|
147
|
+
return BASE_64_DATA_REMOVED
|
148
|
+
elif len(text) > max_length:
|
149
|
+
return create_attachment(text)
|
150
|
+
else:
|
151
|
+
return text
|
152
|
+
|
153
|
+
return content_fn
|
154
|
+
|
155
|
+
|
136
156
|
def walk_events(events: list[Event], content_fn: Callable[[str], str]) -> list[Event]:
|
137
157
|
return [walk_event(event, content_fn) for event in events]
|
138
158
|
|
@@ -150,6 +170,8 @@ def walk_event(event: Event, content_fn: Callable[[str], str]) -> Event:
|
|
150
170
|
return walk_subtask_event(event, content_fn)
|
151
171
|
elif isinstance(event, ToolEvent):
|
152
172
|
return walk_tool_event(event, content_fn)
|
173
|
+
elif isinstance(event, InfoEvent):
|
174
|
+
return walk_info_event(event, content_fn)
|
153
175
|
else:
|
154
176
|
return event
|
155
177
|
|
@@ -164,6 +186,10 @@ def walk_tool_event(event: ToolEvent, content_fn: Callable[[str], str]) -> ToolE
|
|
164
186
|
return event.model_copy(update=dict(events=walk_events(event.events, content_fn)))
|
165
187
|
|
166
188
|
|
189
|
+
def walk_info_event(event: InfoEvent, content_fn: Callable[[str], str]) -> InfoEvent:
|
190
|
+
return event.model_copy(update=dict(data=walk_json_value(event.data, content_fn)))
|
191
|
+
|
192
|
+
|
167
193
|
def walk_sample_init_event(
|
168
194
|
event: SampleInitEvent, content_fn: Callable[[str], str]
|
169
195
|
) -> SampleInitEvent:
|
inspect_ai/log/_log.py
CHANGED
@@ -121,6 +121,9 @@ class EvalConfig(BaseModel):
|
|
121
121
|
log_buffer: int | None = Field(default=None)
|
122
122
|
"""Number of samples to buffer before writing log file."""
|
123
123
|
|
124
|
+
log_shared: int | None = Field(default=None)
|
125
|
+
"""Interval (in seconds) for syncing sample events to log directory."""
|
126
|
+
|
124
127
|
score_display: bool | None = Field(default=None)
|
125
128
|
"""Display scoring metrics realtime."""
|
126
129
|
|
@@ -180,16 +183,16 @@ class EvalSample(BaseModel):
|
|
180
183
|
setup: str | None = Field(default=None)
|
181
184
|
"""Setup script to run for sample (run within default SandboxEnvironment)."""
|
182
185
|
|
183
|
-
messages: list[ChatMessage]
|
186
|
+
messages: list[ChatMessage] = Field(default_factory=list)
|
184
187
|
"""Chat conversation history for sample."""
|
185
188
|
|
186
|
-
output: ModelOutput
|
189
|
+
output: ModelOutput = Field(default_factory=ModelOutput)
|
187
190
|
"""Model output from sample."""
|
188
191
|
|
189
192
|
scores: dict[str, Score] | None = Field(default=None)
|
190
193
|
"""Scores for sample."""
|
191
194
|
|
192
|
-
metadata: dict[str, Any]
|
195
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
193
196
|
"""Additional sample metadata."""
|
194
197
|
|
195
198
|
def metadata_as(self, metadata_cls: Type[MT]) -> MT:
|
@@ -0,0 +1,14 @@
|
|
1
|
+
from .buffer import cleanup_sample_buffers, sample_buffer
|
2
|
+
from .database import SampleBufferDatabase
|
3
|
+
from .types import AttachmentData, EventData, SampleBuffer, SampleData, Samples
|
4
|
+
|
5
|
+
__all__ = [
|
6
|
+
"AttachmentData",
|
7
|
+
"EventData",
|
8
|
+
"SampleData",
|
9
|
+
"Samples",
|
10
|
+
"SampleBuffer",
|
11
|
+
"SampleBufferDatabase",
|
12
|
+
"sample_buffer",
|
13
|
+
"cleanup_sample_buffers",
|
14
|
+
]
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from logging import getLogger
|
2
|
+
|
3
|
+
from .database import SampleBufferDatabase, cleanup_sample_buffer_databases
|
4
|
+
from .filestore import SampleBufferFilestore, cleanup_sample_buffer_filestores
|
5
|
+
from .types import SampleBuffer
|
6
|
+
|
7
|
+
logger = getLogger(__name__)
|
8
|
+
|
9
|
+
|
10
|
+
def sample_buffer(location: str) -> SampleBuffer:
|
11
|
+
try:
|
12
|
+
return SampleBufferDatabase(location, create=False)
|
13
|
+
except FileNotFoundError:
|
14
|
+
return SampleBufferFilestore(location, create=False)
|
15
|
+
|
16
|
+
|
17
|
+
def running_tasks(log_dir: str) -> list[str]:
|
18
|
+
tasks = SampleBufferDatabase.running_tasks(log_dir)
|
19
|
+
if tasks is not None:
|
20
|
+
return tasks
|
21
|
+
else:
|
22
|
+
return SampleBufferFilestore.running_tasks(log_dir) or []
|
23
|
+
|
24
|
+
|
25
|
+
def cleanup_sample_buffers(log_dir: str) -> None:
|
26
|
+
try:
|
27
|
+
cleanup_sample_buffer_databases()
|
28
|
+
cleanup_sample_buffer_filestores(log_dir)
|
29
|
+
except Exception as ex:
|
30
|
+
logger.warning(f"Unexpected error cleaning up sample buffers: {ex}")
|