inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_cli/eval.py +35 -2
- inspect_ai/_cli/util.py +44 -1
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +13 -4
- inspect_ai/_display/core/results.py +1 -1
- inspect_ai/_display/textual/app.py +14 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +9 -3
- inspect_ai/_display/textual/widgets/task_detail.py +8 -8
- inspect_ai/_display/textual/widgets/tasks.py +17 -1
- inspect_ai/_display/textual/widgets/vscode.py +44 -0
- inspect_ai/_eval/eval.py +74 -25
- inspect_ai/_eval/evalset.py +22 -18
- inspect_ai/_eval/loader.py +34 -11
- inspect_ai/_eval/run.py +13 -15
- inspect_ai/_eval/score.py +13 -3
- inspect_ai/_eval/task/generate.py +8 -9
- inspect_ai/_eval/task/log.py +55 -6
- inspect_ai/_eval/task/run.py +51 -10
- inspect_ai/_eval/task/task.py +23 -9
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/file.py +30 -1
- inspect_ai/_util/json.py +37 -1
- inspect_ai/_util/registry.py +1 -0
- inspect_ai/_util/vscode.py +37 -0
- inspect_ai/_view/server.py +113 -1
- inspect_ai/_view/www/App.css +7 -1
- inspect_ai/_view/www/dist/assets/index.css +813 -415
- inspect_ai/_view/www/dist/assets/index.js +54475 -32003
- inspect_ai/_view/www/eslint.config.mjs +1 -1
- inspect_ai/_view/www/log-schema.json +137 -31
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/package.json +11 -2
- inspect_ai/_view/www/src/App.tsx +161 -853
- inspect_ai/_view/www/src/api/api-browser.ts +176 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
- inspect_ai/_view/www/src/api/client-api.ts +66 -10
- inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
- inspect_ai/_view/www/src/api/types.ts +107 -2
- inspect_ai/_view/www/src/appearance/icons.ts +2 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
- inspect_ai/_view/www/src/components/Card.tsx +6 -4
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
- inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
- inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
- inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
- inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
- inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
- inspect_ai/_view/www/src/components/Modal.module.css +38 -0
- inspect_ai/_view/www/src/components/Modal.tsx +77 -0
- inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
- inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
- inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
- inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
- inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
- inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
- inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
- inspect_ai/_view/www/src/index.tsx +26 -94
- inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
- inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
- inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
- inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
- inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
- inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
- inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
- inspect_ai/_view/www/src/scoring/utils.ts +87 -0
- inspect_ai/_view/www/src/state/appSlice.ts +244 -0
- inspect_ai/_view/www/src/state/hooks.ts +399 -0
- inspect_ai/_view/www/src/state/logPolling.ts +200 -0
- inspect_ai/_view/www/src/state/logSlice.ts +224 -0
- inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
- inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
- inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
- inspect_ai/_view/www/src/state/scrolling.ts +206 -0
- inspect_ai/_view/www/src/state/store.ts +168 -0
- inspect_ai/_view/www/src/state/store_filter.ts +84 -0
- inspect_ai/_view/www/src/state/utils.ts +23 -0
- inspect_ai/_view/www/src/storage/index.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +36 -26
- inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
- inspect_ai/_view/www/src/types.ts +94 -32
- inspect_ai/_view/www/src/utils/attachments.ts +58 -23
- inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
- inspect_ai/_view/www/src/utils/logger.ts +52 -0
- inspect_ai/_view/www/src/utils/polling.ts +100 -0
- inspect_ai/_view/www/src/utils/react.ts +30 -0
- inspect_ai/_view/www/src/utils/vscode.ts +1 -1
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
- inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
- inspect_ai/_view/www/src/workspace/types.ts +4 -3
- inspect_ai/_view/www/src/workspace/utils.ts +4 -4
- inspect_ai/_view/www/vite.config.js +6 -0
- inspect_ai/_view/www/yarn.lock +464 -355
- inspect_ai/agent/__init__.py +36 -0
- inspect_ai/agent/_agent.py +268 -0
- inspect_ai/agent/_as_solver.py +72 -0
- inspect_ai/agent/_as_tool.py +122 -0
- inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
- inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
- inspect_ai/agent/_filter.py +46 -0
- inspect_ai/agent/_handoff.py +93 -0
- inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
- inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
- inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
- inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
- inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
- inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
- inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
- inspect_ai/agent/_react.py +241 -0
- inspect_ai/agent/_run.py +36 -0
- inspect_ai/agent/_types.py +81 -0
- inspect_ai/log/_condense.py +26 -0
- inspect_ai/log/_log.py +17 -5
- inspect_ai/log/_recorders/buffer/__init__.py +14 -0
- inspect_ai/log/_recorders/buffer/buffer.py +30 -0
- inspect_ai/log/_recorders/buffer/database.py +685 -0
- inspect_ai/log/_recorders/buffer/filestore.py +259 -0
- inspect_ai/log/_recorders/buffer/types.py +84 -0
- inspect_ai/log/_recorders/eval.py +2 -11
- inspect_ai/log/_recorders/types.py +30 -0
- inspect_ai/log/_transcript.py +32 -2
- inspect_ai/model/__init__.py +7 -1
- inspect_ai/model/_call_tools.py +257 -52
- inspect_ai/model/_chat_message.py +7 -4
- inspect_ai/model/_conversation.py +13 -62
- inspect_ai/model/_display.py +85 -0
- inspect_ai/model/_generate_config.py +2 -2
- inspect_ai/model/_model.py +114 -14
- inspect_ai/model/_model_output.py +14 -9
- inspect_ai/model/_openai.py +16 -4
- inspect_ai/model/_openai_computer_use.py +162 -0
- inspect_ai/model/_openai_responses.py +319 -165
- inspect_ai/model/_providers/anthropic.py +20 -21
- inspect_ai/model/_providers/azureai.py +24 -13
- inspect_ai/model/_providers/bedrock.py +1 -7
- inspect_ai/model/_providers/cloudflare.py +3 -3
- inspect_ai/model/_providers/goodfire.py +2 -6
- inspect_ai/model/_providers/google.py +11 -10
- inspect_ai/model/_providers/groq.py +6 -3
- inspect_ai/model/_providers/hf.py +7 -3
- inspect_ai/model/_providers/mistral.py +7 -10
- inspect_ai/model/_providers/openai.py +47 -17
- inspect_ai/model/_providers/openai_o1.py +11 -4
- inspect_ai/model/_providers/openai_responses.py +12 -14
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/together.py +12 -2
- inspect_ai/model/_providers/util/chatapi.py +7 -2
- inspect_ai/model/_providers/util/hf_handler.py +4 -2
- inspect_ai/model/_providers/util/llama31.py +4 -2
- inspect_ai/model/_providers/vertex.py +11 -9
- inspect_ai/model/_providers/vllm.py +4 -4
- inspect_ai/scorer/__init__.py +2 -0
- inspect_ai/scorer/_metrics/__init__.py +2 -0
- inspect_ai/scorer/_metrics/grouped.py +84 -0
- inspect_ai/scorer/_score.py +26 -6
- inspect_ai/solver/__init__.py +2 -2
- inspect_ai/solver/_basic_agent.py +22 -9
- inspect_ai/solver/_bridge.py +31 -0
- inspect_ai/solver/_chain.py +20 -12
- inspect_ai/solver/_fork.py +5 -1
- inspect_ai/solver/_human_agent.py +52 -0
- inspect_ai/solver/_prompt.py +3 -1
- inspect_ai/solver/_run.py +59 -0
- inspect_ai/solver/_solver.py +14 -4
- inspect_ai/solver/_task_state.py +5 -3
- inspect_ai/tool/_tool_call.py +15 -8
- inspect_ai/tool/_tool_def.py +17 -12
- inspect_ai/tool/_tool_support_helpers.py +4 -4
- inspect_ai/tool/_tool_with.py +14 -11
- inspect_ai/tool/_tools/_bash_session.py +11 -2
- inspect_ai/tool/_tools/_computer/_common.py +18 -2
- inspect_ai/tool/_tools/_computer/_computer.py +18 -2
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
- inspect_ai/tool/_tools/_think.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_anyio.py +27 -0
- inspect_ai/util/_sandbox/__init__.py +2 -1
- inspect_ai/util/_sandbox/context.py +32 -7
- inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/docker/docker.py +12 -1
- inspect_ai/util/_store_model.py +30 -7
- inspect_ai/util/_subprocess.py +13 -3
- inspect_ai/util/_subtask.py +1 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
- /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -1,27 +1,22 @@
|
|
1
1
|
import { ApplicationIcons } from "../appearance/icons";
|
2
2
|
import { LargeModal, ModalTool, ModalTools } from "../components/LargeModal";
|
3
3
|
|
4
|
-
import { FC, Ref,
|
4
|
+
import { FC, Ref, useCallback, useEffect, useMemo, useRef } from "react";
|
5
5
|
import { ErrorPanel } from "../components/ErrorPanel";
|
6
|
-
import {
|
6
|
+
import { useLogSelection, usePrevious, useSampleData } from "../state/hooks";
|
7
|
+
import { useStatefulScrollPosition } from "../state/scrolling";
|
8
|
+
import { useStore } from "../state/store";
|
7
9
|
import { SampleDisplay } from "./SampleDisplay";
|
8
|
-
import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
|
9
10
|
|
10
11
|
interface SampleDialogProps {
|
11
12
|
id: string;
|
12
13
|
title: string;
|
13
|
-
sampleStatus: string;
|
14
|
-
sampleError?: Error;
|
15
|
-
sample?: EvalSample;
|
16
|
-
sampleDescriptor: SamplesDescriptor;
|
17
14
|
selectedTab?: string;
|
18
15
|
setSelectedTab: (tab: string) => void;
|
19
16
|
showingSampleDialog: boolean;
|
20
17
|
setShowingSampleDialog: (showing: boolean) => void;
|
21
18
|
nextSample: () => void;
|
22
19
|
prevSample: () => void;
|
23
|
-
sampleScrollPositionRef: RefObject<number>;
|
24
|
-
setSampleScrollPosition: (position: number) => void;
|
25
20
|
}
|
26
21
|
|
27
22
|
/**
|
@@ -30,21 +25,62 @@ interface SampleDialogProps {
|
|
30
25
|
export const SampleDialog: FC<SampleDialogProps> = ({
|
31
26
|
id,
|
32
27
|
title,
|
33
|
-
sample,
|
34
|
-
sampleDescriptor,
|
35
28
|
nextSample,
|
36
29
|
prevSample,
|
37
|
-
sampleStatus,
|
38
|
-
sampleError,
|
39
30
|
showingSampleDialog,
|
40
31
|
setShowingSampleDialog,
|
41
32
|
selectedTab,
|
42
33
|
setSelectedTab,
|
43
|
-
sampleScrollPositionRef,
|
44
|
-
setSampleScrollPosition,
|
45
34
|
}) => {
|
35
|
+
// Scroll referernce (attach stateful trackign)
|
46
36
|
const scrollRef: Ref<HTMLDivElement> = useRef(null);
|
37
|
+
useStatefulScrollPosition(scrollRef, "sample-dialog");
|
47
38
|
|
39
|
+
// Sample hooks
|
40
|
+
const sampleData = useSampleData();
|
41
|
+
const loadSample = useStore((state) => state.sampleActions.loadSample);
|
42
|
+
const pollSample = useStore((state) => state.sampleActions.pollSample);
|
43
|
+
const logSelection = useLogSelection();
|
44
|
+
|
45
|
+
useEffect(() => {
|
46
|
+
if (sampleData.running && logSelection.logFile && logSelection.sample) {
|
47
|
+
pollSample(logSelection.logFile, logSelection.sample);
|
48
|
+
}
|
49
|
+
}, []);
|
50
|
+
|
51
|
+
// Load sample
|
52
|
+
const prevCompleted = usePrevious(
|
53
|
+
logSelection.sample?.completed !== undefined
|
54
|
+
? logSelection.sample.completed
|
55
|
+
: true,
|
56
|
+
);
|
57
|
+
const prevLogFile = usePrevious<string | undefined>(logSelection.logFile);
|
58
|
+
useEffect(() => {
|
59
|
+
if (logSelection.logFile && logSelection.sample) {
|
60
|
+
const currentSampleCompleted =
|
61
|
+
logSelection.sample.completed !== undefined
|
62
|
+
? logSelection.sample.completed
|
63
|
+
: true;
|
64
|
+
|
65
|
+
if (
|
66
|
+
prevLogFile !== logSelection.logFile ||
|
67
|
+
sampleData.sample?.id !== logSelection.sample.id ||
|
68
|
+
sampleData.sample?.epoch !== logSelection.sample.epoch ||
|
69
|
+
currentSampleCompleted !== prevCompleted
|
70
|
+
) {
|
71
|
+
loadSample(logSelection.logFile, logSelection.sample);
|
72
|
+
}
|
73
|
+
}
|
74
|
+
}, [
|
75
|
+
logSelection.logFile,
|
76
|
+
logSelection.sample?.id,
|
77
|
+
logSelection.sample?.epoch,
|
78
|
+
logSelection.sample?.completed,
|
79
|
+
sampleData.sample?.id,
|
80
|
+
sampleData.sample?.epoch,
|
81
|
+
]);
|
82
|
+
|
83
|
+
// Tools
|
48
84
|
const tools = useMemo<ModalTools>(() => {
|
49
85
|
const nextTool: ModalTool = {
|
50
86
|
label: "Next Sample",
|
@@ -100,18 +136,18 @@ export const SampleDialog: FC<SampleDialogProps> = ({
|
|
100
136
|
onkeyup={handleKeyUp}
|
101
137
|
visible={showingSampleDialog}
|
102
138
|
onHide={onHide}
|
103
|
-
showProgress={
|
104
|
-
|
105
|
-
|
139
|
+
showProgress={
|
140
|
+
sampleData.status === "loading" || sampleData.status === "streaming"
|
141
|
+
}
|
106
142
|
scrollRef={scrollRef}
|
107
143
|
>
|
108
|
-
{
|
109
|
-
<ErrorPanel title="Sample Error" error={
|
144
|
+
{sampleData.error ? (
|
145
|
+
<ErrorPanel title="Sample Error" error={sampleData.error} />
|
110
146
|
) : (
|
111
147
|
<SampleDisplay
|
112
148
|
id={id}
|
113
|
-
sample={sample}
|
114
|
-
|
149
|
+
sample={sampleData.sample}
|
150
|
+
runningEvents={sampleData.running}
|
115
151
|
selectedTab={selectedTab}
|
116
152
|
setSelectedTab={setSelectedTab}
|
117
153
|
scrollRef={scrollRef}
|
@@ -7,13 +7,20 @@ import { isVscode } from "../utils/vscode";
|
|
7
7
|
import { ApplicationIcons } from "../appearance/icons";
|
8
8
|
import { ANSIDisplay } from "../components/AnsiDisplay";
|
9
9
|
import { ToolButton } from "../components/ToolButton";
|
10
|
-
import { SampleScoreView } from "./scores/SampleScoreView";
|
11
10
|
|
12
11
|
import clsx from "clsx";
|
13
|
-
import {
|
12
|
+
import {
|
13
|
+
FC,
|
14
|
+
Fragment,
|
15
|
+
MouseEvent,
|
16
|
+
RefObject,
|
17
|
+
useCallback,
|
18
|
+
useMemo,
|
19
|
+
} from "react";
|
20
|
+
import { SampleSummary } from "../api/types";
|
14
21
|
import { Card, CardBody, CardHeader } from "../components/Card";
|
15
|
-
import { EmptyPanel } from "../components/EmptyPanel";
|
16
22
|
import { JSONPanel } from "../components/JsonPanel";
|
23
|
+
import { NoContentsPanel } from "../components/NoContentsPanel";
|
17
24
|
import {
|
18
25
|
kSampleErrorTabId,
|
19
26
|
kSampleJsonTabId,
|
@@ -22,23 +29,26 @@ import {
|
|
22
29
|
kSampleScoringTabId,
|
23
30
|
kSampleTranscriptTabId,
|
24
31
|
} from "../constants";
|
25
|
-
import {
|
32
|
+
import { useSampleSummaries } from "../state/hooks";
|
33
|
+
import { useStore } from "../state/store";
|
34
|
+
import { EvalSample, Events } from "../types/log";
|
26
35
|
import { ModelTokenTable } from "../usage/ModelTokenTable";
|
27
36
|
import { formatTime } from "../utils/format";
|
28
37
|
import { printHeadingHtml, printHtml } from "../utils/print";
|
29
38
|
import { ChatViewVirtualList } from "./chat/ChatViewVirtualList";
|
30
|
-
import {
|
39
|
+
import { messagesFromEvents } from "./chat/messages";
|
31
40
|
import styles from "./SampleDisplay.module.css";
|
32
41
|
import { SampleSummaryView } from "./SampleSummaryView";
|
33
|
-
import {
|
42
|
+
import { SampleScoresView } from "./scores/SampleScoresView";
|
43
|
+
import { TranscriptVirtualList } from "./transcript/TranscriptView";
|
34
44
|
|
35
45
|
interface SampleDisplayProps {
|
36
46
|
id: string;
|
37
47
|
sample?: EvalSample;
|
38
|
-
sampleDescriptor: SamplesDescriptor;
|
39
48
|
selectedTab?: string;
|
40
49
|
setSelectedTab: (tab: string) => void;
|
41
50
|
scrollRef: RefObject<HTMLDivElement | null>;
|
51
|
+
runningEvents?: Events;
|
42
52
|
}
|
43
53
|
|
44
54
|
/**
|
@@ -47,18 +57,32 @@ interface SampleDisplayProps {
|
|
47
57
|
export const SampleDisplay: FC<SampleDisplayProps> = ({
|
48
58
|
id,
|
49
59
|
sample,
|
50
|
-
sampleDescriptor,
|
51
60
|
selectedTab,
|
52
61
|
setSelectedTab,
|
53
62
|
scrollRef,
|
63
|
+
runningEvents: runningSampleData,
|
54
64
|
}) => {
|
55
65
|
// Tab ids
|
56
66
|
const baseId = `sample-dialog`;
|
67
|
+
const sampleSummaries = useSampleSummaries();
|
68
|
+
const selectedSampleIndex = useStore(
|
69
|
+
(state) => state.log.selectedSampleIndex,
|
70
|
+
);
|
57
71
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
72
|
+
const sampleSummary = sampleSummaries[selectedSampleIndex];
|
73
|
+
|
74
|
+
// Consolidate the events and messages into the proper list
|
75
|
+
// whether running or not
|
76
|
+
const sampleEvents = sample?.events || runningSampleData;
|
77
|
+
const sampleMessages = useMemo(() => {
|
78
|
+
if (sample?.messages) {
|
79
|
+
return sample.messages;
|
80
|
+
} else if (runningSampleData) {
|
81
|
+
return messagesFromEvents(runningSampleData);
|
82
|
+
} else {
|
83
|
+
return [];
|
84
|
+
}
|
85
|
+
}, [sample?.messages, runningSampleData]);
|
62
86
|
|
63
87
|
// Tab selection
|
64
88
|
const onSelectedTab = (e: MouseEvent<HTMLElement>) => {
|
@@ -68,12 +92,15 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
68
92
|
return false;
|
69
93
|
};
|
70
94
|
|
71
|
-
const scorerNames = Object.keys(sample.scores || {});
|
72
95
|
const sampleMetadatas = metadataViewsForSample(`${baseId}-${id}`, sample);
|
73
96
|
|
74
97
|
const tabsetId = `task-sample-details-tab-${id}`;
|
75
98
|
const targetId = `${tabsetId}-content`;
|
76
99
|
|
100
|
+
const handlePrintClick = useCallback(() => {
|
101
|
+
printSample(id, targetId);
|
102
|
+
}, [printSample, id, targetId]);
|
103
|
+
|
77
104
|
const tools = [];
|
78
105
|
if (!isVscode()) {
|
79
106
|
tools.push(
|
@@ -81,51 +108,48 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
81
108
|
key="sample-print-tool"
|
82
109
|
label="Print"
|
83
110
|
icon={ApplicationIcons.copy}
|
84
|
-
onClick={
|
85
|
-
printSample(id, targetId);
|
86
|
-
}}
|
111
|
+
onClick={handlePrintClick}
|
87
112
|
/>,
|
88
113
|
);
|
89
114
|
}
|
90
115
|
|
116
|
+
// Is the sample running?
|
117
|
+
const running = isRunning(sampleSummary, runningSampleData);
|
118
|
+
|
91
119
|
return (
|
92
120
|
<Fragment>
|
93
|
-
|
94
|
-
parent_id={id}
|
95
|
-
|
96
|
-
sampleDescriptor={sampleDescriptor}
|
97
|
-
/>
|
121
|
+
{sample || sampleSummary ? (
|
122
|
+
<SampleSummaryView parent_id={id} sample={sample || sampleSummary} />
|
123
|
+
) : undefined}
|
98
124
|
<TabSet
|
99
125
|
id={tabsetId}
|
100
126
|
tabControlsClassName={clsx("text-size-base")}
|
101
127
|
tabPanelsClassName={clsx(styles.tabPanel)}
|
102
128
|
tools={tools}
|
103
129
|
>
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
</TabPanel>
|
124
|
-
) : null}
|
130
|
+
<TabPanel
|
131
|
+
key={kSampleTranscriptTabId}
|
132
|
+
id={kSampleTranscriptTabId}
|
133
|
+
className="sample-tab"
|
134
|
+
title="Transcript"
|
135
|
+
onSelected={onSelectedTab}
|
136
|
+
selected={
|
137
|
+
selectedTab === kSampleTranscriptTabId || selectedTab === undefined
|
138
|
+
}
|
139
|
+
scrollable={false}
|
140
|
+
>
|
141
|
+
<TranscriptVirtualList
|
142
|
+
key={`${baseId}-transcript-display-${id}`}
|
143
|
+
id={`${baseId}-transcript-display-${id}`}
|
144
|
+
events={sampleEvents || []}
|
145
|
+
running={running}
|
146
|
+
scrollRef={scrollRef}
|
147
|
+
/>
|
148
|
+
</TabPanel>
|
125
149
|
<TabPanel
|
126
150
|
key={kSampleMessagesTabId}
|
127
151
|
id={kSampleMessagesTabId}
|
128
|
-
className={clsx("sample-tab", styles.fullWidth)}
|
152
|
+
className={clsx("sample-tab", styles.fullWidth, styles.chat)}
|
129
153
|
title="Messages"
|
130
154
|
onSelected={onSelectedTab}
|
131
155
|
selected={selectedTab === kSampleMessagesTabId}
|
@@ -134,62 +158,37 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
134
158
|
<ChatViewVirtualList
|
135
159
|
key={`${baseId}-chat-${id}`}
|
136
160
|
id={`${baseId}-chat-${id}`}
|
137
|
-
messages={
|
161
|
+
messages={sampleMessages}
|
138
162
|
indented={true}
|
139
163
|
scrollRef={scrollRef}
|
140
164
|
toolCallStyle="complete"
|
165
|
+
running={running}
|
141
166
|
/>
|
142
167
|
</TabPanel>
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
const tabId = `score-${scorer}`;
|
162
|
-
return (
|
163
|
-
<TabPanel
|
164
|
-
key={tabId}
|
165
|
-
id={tabId}
|
166
|
-
className="sample-tab"
|
167
|
-
title={scorer}
|
168
|
-
onSelected={onSelectedTab}
|
169
|
-
selected={selectedTab === tabId}
|
170
|
-
>
|
171
|
-
<SampleScoreView
|
172
|
-
sample={sample}
|
173
|
-
sampleDescriptor={sampleDescriptor}
|
174
|
-
scorer={scorer}
|
175
|
-
/>
|
176
|
-
</TabPanel>
|
177
|
-
);
|
178
|
-
})}
|
179
|
-
</>
|
180
|
-
)}
|
181
|
-
{sampleMetadatas.length > 0 ? (
|
182
|
-
<TabPanel
|
183
|
-
id={kSampleMetdataTabId}
|
184
|
-
className={clsx("sample-tab")}
|
185
|
-
title="Metadata"
|
186
|
-
onSelected={onSelectedTab}
|
187
|
-
selected={selectedTab === kSampleMetdataTabId}
|
188
|
-
>
|
168
|
+
<TabPanel
|
169
|
+
key={kSampleScoringTabId}
|
170
|
+
id={kSampleScoringTabId}
|
171
|
+
className="sample-tab"
|
172
|
+
title="Scoring"
|
173
|
+
onSelected={onSelectedTab}
|
174
|
+
selected={selectedTab === kSampleScoringTabId}
|
175
|
+
>
|
176
|
+
<SampleScoresView sample={sample} />
|
177
|
+
</TabPanel>
|
178
|
+
<TabPanel
|
179
|
+
id={kSampleMetdataTabId}
|
180
|
+
className={clsx("sample-tab")}
|
181
|
+
title="Metadata"
|
182
|
+
onSelected={onSelectedTab}
|
183
|
+
selected={selectedTab === kSampleMetdataTabId}
|
184
|
+
>
|
185
|
+
{sampleMetadatas.length > 0 ? (
|
189
186
|
<div className={clsx(styles.metadataPanel)}>{sampleMetadatas}</div>
|
190
|
-
|
191
|
-
|
192
|
-
|
187
|
+
) : (
|
188
|
+
<NoContentsPanel text="No metadata" />
|
189
|
+
)}
|
190
|
+
</TabPanel>
|
191
|
+
{sample?.error ? (
|
193
192
|
<TabPanel
|
194
193
|
id={kSampleErrorTabId}
|
195
194
|
className="sample-tab"
|
@@ -205,14 +204,18 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
205
204
|
</div>
|
206
205
|
</TabPanel>
|
207
206
|
) : null}
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
207
|
+
<TabPanel
|
208
|
+
id={kSampleJsonTabId}
|
209
|
+
className={"sample-tab"}
|
210
|
+
title="JSON"
|
211
|
+
onSelected={onSelectedTab}
|
212
|
+
selected={selectedTab === kSampleJsonTabId}
|
213
|
+
>
|
214
|
+
{!sample ? (
|
215
|
+
<NoContentsPanel text="JSON not available" />
|
216
|
+
) : sample.messages.length > 100 ? (
|
217
|
+
<NoContentsPanel text="JSON too large too display" />
|
218
|
+
) : (
|
216
219
|
<div className={clsx(styles.padded, styles.fullWidth)}>
|
217
220
|
<JSONPanel
|
218
221
|
data={sample}
|
@@ -220,14 +223,17 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
220
223
|
className={clsx("text-size-small")}
|
221
224
|
/>
|
222
225
|
</div>
|
223
|
-
|
224
|
-
|
226
|
+
)}
|
227
|
+
</TabPanel>
|
225
228
|
</TabSet>
|
226
229
|
</Fragment>
|
227
230
|
);
|
228
231
|
};
|
229
232
|
|
230
|
-
const metadataViewsForSample = (id: string, sample
|
233
|
+
const metadataViewsForSample = (id: string, sample?: EvalSample) => {
|
234
|
+
if (!sample) {
|
235
|
+
return [];
|
236
|
+
}
|
231
237
|
const sampleMetadatas = [];
|
232
238
|
|
233
239
|
if (sample.model_usage && Object.keys(sample.model_usage).length > 0) {
|
@@ -360,3 +366,28 @@ const printSample = (id: string, targetId: string) => {
|
|
360
366
|
}
|
361
367
|
}
|
362
368
|
};
|
369
|
+
|
370
|
+
const isRunning = (
|
371
|
+
sampleSummary?: SampleSummary,
|
372
|
+
runningSampleData?: Events,
|
373
|
+
): boolean => {
|
374
|
+
if (sampleSummary && sampleSummary.completed === false) {
|
375
|
+
// An explicitly incomplete sample summary
|
376
|
+
return true;
|
377
|
+
}
|
378
|
+
|
379
|
+
if (
|
380
|
+
!sampleSummary &&
|
381
|
+
(!runningSampleData || runningSampleData.length === 0)
|
382
|
+
) {
|
383
|
+
// No sample summary yet and no running samples, must've just started
|
384
|
+
return true;
|
385
|
+
}
|
386
|
+
|
387
|
+
if (runningSampleData && runningSampleData.length > 0) {
|
388
|
+
// There are running samples
|
389
|
+
return true;
|
390
|
+
}
|
391
|
+
|
392
|
+
return false;
|
393
|
+
};
|
@@ -19,6 +19,11 @@
|
|
19
19
|
justify-content: center;
|
20
20
|
}
|
21
21
|
|
22
|
+
.centerValue {
|
23
|
+
display: flex;
|
24
|
+
align-items: center;
|
25
|
+
}
|
26
|
+
|
22
27
|
.wrap {
|
23
28
|
word-wrap: anywhere;
|
24
29
|
}
|
@@ -26,3 +31,8 @@
|
|
26
31
|
.titled:hover {
|
27
32
|
cursor: pointer;
|
28
33
|
}
|
34
|
+
|
35
|
+
.value {
|
36
|
+
flex-direction: column;
|
37
|
+
padding-top: 0.1em;
|
38
|
+
}
|