inspect-ai 0.3.80__py3-none-any.whl → 0.3.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +35 -2
- inspect_ai/_cli/util.py +44 -1
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +13 -4
- inspect_ai/_display/core/results.py +1 -1
- inspect_ai/_display/textual/widgets/task_detail.py +5 -4
- inspect_ai/_eval/eval.py +38 -1
- inspect_ai/_eval/evalset.py +5 -0
- inspect_ai/_eval/run.py +5 -2
- inspect_ai/_eval/task/log.py +53 -6
- inspect_ai/_eval/task/run.py +51 -10
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/file.py +17 -1
- inspect_ai/_util/json.py +36 -1
- inspect_ai/_view/server.py +113 -1
- inspect_ai/_view/www/App.css +1 -1
- inspect_ai/_view/www/dist/assets/index.css +518 -296
- inspect_ai/_view/www/dist/assets/index.js +38803 -36307
- inspect_ai/_view/www/eslint.config.mjs +1 -1
- inspect_ai/_view/www/log-schema.json +13 -0
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/package.json +8 -2
- inspect_ai/_view/www/src/App.tsx +151 -855
- inspect_ai/_view/www/src/api/api-browser.ts +176 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
- inspect_ai/_view/www/src/api/client-api.ts +66 -10
- inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
- inspect_ai/_view/www/src/api/types.ts +107 -2
- inspect_ai/_view/www/src/appearance/icons.ts +1 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
- inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
- inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
- inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -3
- inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
- inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
- inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
- inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
- inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
- inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
- inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
- inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
- inspect_ai/_view/www/src/index.tsx +26 -94
- inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +67 -28
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +51 -22
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +144 -90
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +82 -35
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +23 -30
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +4 -1
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +3 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +34 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +10 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +25 -17
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +2 -1
- inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +21 -3
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +20 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +105 -85
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +27 -14
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
- inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
- inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +7 -9
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +7 -11
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +8 -13
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +52 -58
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +30 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
- inspect_ai/_view/www/src/scoring/utils.ts +87 -0
- inspect_ai/_view/www/src/state/appSlice.ts +244 -0
- inspect_ai/_view/www/src/state/hooks.ts +397 -0
- inspect_ai/_view/www/src/state/logPolling.ts +196 -0
- inspect_ai/_view/www/src/state/logSlice.ts +214 -0
- inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
- inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +311 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +127 -0
- inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
- inspect_ai/_view/www/src/state/scrolling.ts +206 -0
- inspect_ai/_view/www/src/state/store.ts +168 -0
- inspect_ai/_view/www/src/state/store_filter.ts +84 -0
- inspect_ai/_view/www/src/state/utils.ts +23 -0
- inspect_ai/_view/www/src/storage/index.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +2 -0
- inspect_ai/_view/www/src/types.ts +94 -32
- inspect_ai/_view/www/src/utils/attachments.ts +58 -23
- inspect_ai/_view/www/src/utils/logger.ts +52 -0
- inspect_ai/_view/www/src/utils/polling.ts +100 -0
- inspect_ai/_view/www/src/utils/react.ts +30 -0
- inspect_ai/_view/www/src/utils/vscode.ts +1 -1
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +181 -216
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +0 -1
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +98 -39
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +11 -13
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +110 -115
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
- inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
- inspect_ai/_view/www/src/workspace/types.ts +4 -3
- inspect_ai/_view/www/src/workspace/utils.ts +4 -4
- inspect_ai/_view/www/vite.config.js +6 -0
- inspect_ai/_view/www/yarn.lock +370 -354
- inspect_ai/log/_condense.py +26 -0
- inspect_ai/log/_log.py +6 -3
- inspect_ai/log/_recorders/buffer/__init__.py +14 -0
- inspect_ai/log/_recorders/buffer/buffer.py +30 -0
- inspect_ai/log/_recorders/buffer/database.py +685 -0
- inspect_ai/log/_recorders/buffer/filestore.py +259 -0
- inspect_ai/log/_recorders/buffer/types.py +84 -0
- inspect_ai/log/_recorders/eval.py +2 -11
- inspect_ai/log/_recorders/types.py +30 -0
- inspect_ai/log/_transcript.py +27 -1
- inspect_ai/model/_call_tools.py +1 -0
- inspect_ai/model/_generate_config.py +2 -2
- inspect_ai/model/_model.py +1 -0
- inspect_ai/tool/_tool_support_helpers.py +4 -4
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
- inspect_ai/util/_subtask.py +1 -0
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/RECORD +178 -138
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ import { formatNumber } from "../utils/format";
|
|
7
7
|
import { MetaDataView } from "./MetaDataView";
|
8
8
|
|
9
9
|
import clsx from "clsx";
|
10
|
-
import
|
10
|
+
import { FC, Fragment, isValidElement, JSX, ReactNode } from "react";
|
11
11
|
import JSONPanel from "../components/JsonPanel";
|
12
12
|
import { isJson } from "../utils/json";
|
13
13
|
import styles from "./RenderedContent.module.css";
|
@@ -21,7 +21,7 @@ interface RenderedContentProps {
|
|
21
21
|
/**
|
22
22
|
* Renders content based on its type using registered content renderers.
|
23
23
|
*/
|
24
|
-
export const RenderedContent:
|
24
|
+
export const RenderedContent: FC<RenderedContentProps> = ({
|
25
25
|
id,
|
26
26
|
entry,
|
27
27
|
}): JSX.Element => {
|
@@ -43,8 +43,7 @@ export const RenderedContent: React.FC<RenderedContentProps> = ({
|
|
43
43
|
|
44
44
|
if (renderer) {
|
45
45
|
const { rendered } = renderer.render(id, entry);
|
46
|
-
|
47
|
-
if (rendered !== undefined && React.isValidElement(rendered)) {
|
46
|
+
if (rendered !== undefined && isValidElement(rendered)) {
|
48
47
|
return rendered;
|
49
48
|
}
|
50
49
|
}
|
@@ -185,7 +184,7 @@ const contentRenderers: Record<string, ContentRenderer> = {
|
|
185
184
|
return typeof entry.value === "object" && entry.name === "web_search";
|
186
185
|
},
|
187
186
|
render: (_id, entry) => {
|
188
|
-
const results:
|
187
|
+
const results: ReactNode[] = [];
|
189
188
|
results.push(
|
190
189
|
<div className={styles.query}>
|
191
190
|
<i className={ApplicationIcons.search}></i> {entry.value.query}
|
@@ -253,7 +252,6 @@ const contentRenderers: Record<string, ContentRenderer> = {
|
|
253
252
|
return typeof entry.value === "object";
|
254
253
|
},
|
255
254
|
render: (id, entry) => {
|
256
|
-
// Generate a json preview
|
257
255
|
return {
|
258
256
|
rendered: (
|
259
257
|
<MetaDataView
|
@@ -1,21 +1,17 @@
|
|
1
|
-
import { FC,
|
1
|
+
import { FC, useEffect, useRef } from "react";
|
2
2
|
import { ErrorPanel } from "../components/ErrorPanel";
|
3
|
-
import { ProgressBar } from "../components/ProgressBar";
|
4
|
-
import { EvalSample } from "../types/log";
|
5
3
|
import { SampleDisplay } from "./SampleDisplay";
|
6
|
-
import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
|
7
4
|
|
5
|
+
import clsx from "clsx";
|
6
|
+
import { ProgressBar } from "../components/ProgressBar";
|
7
|
+
import { useLogSelection, usePrevious, useSampleData } from "../state/hooks";
|
8
|
+
import { useStore } from "../state/store";
|
8
9
|
import styles from "./InlineSampleDisplay.module.css";
|
9
10
|
|
10
11
|
interface InlineSampleDisplayProps {
|
11
12
|
id: string;
|
12
|
-
sampleStatus: string;
|
13
|
-
sampleError?: Error;
|
14
|
-
sample?: EvalSample;
|
15
|
-
sampleDescriptor: SamplesDescriptor;
|
16
13
|
selectedTab?: string;
|
17
14
|
setSelectedTab: (tab: string) => void;
|
18
|
-
scrollRef: RefObject<HTMLDivElement | null>;
|
19
15
|
}
|
20
16
|
|
21
17
|
/**
|
@@ -23,30 +19,73 @@ interface InlineSampleDisplayProps {
|
|
23
19
|
*/
|
24
20
|
export const InlineSampleDisplay: FC<InlineSampleDisplayProps> = ({
|
25
21
|
id,
|
26
|
-
sample,
|
27
|
-
sampleStatus,
|
28
|
-
sampleError,
|
29
|
-
sampleDescriptor,
|
30
22
|
selectedTab,
|
31
23
|
setSelectedTab,
|
32
|
-
scrollRef,
|
33
24
|
}) => {
|
25
|
+
// Sample hooks
|
26
|
+
const sampleData = useSampleData();
|
27
|
+
const loadSample = useStore((state) => state.sampleActions.loadSample);
|
28
|
+
const logSelection = useLogSelection();
|
29
|
+
|
30
|
+
// Sample Loading
|
31
|
+
const prevCompleted = usePrevious(
|
32
|
+
logSelection.sample?.completed !== undefined
|
33
|
+
? logSelection.sample.completed
|
34
|
+
: true,
|
35
|
+
);
|
36
|
+
const prevLogFile = usePrevious<string | undefined>(logSelection.logFile);
|
37
|
+
useEffect(() => {
|
38
|
+
if (logSelection.logFile && logSelection.sample) {
|
39
|
+
const currentSampleCompleted =
|
40
|
+
logSelection.sample?.completed !== undefined
|
41
|
+
? logSelection.sample.completed
|
42
|
+
: true;
|
43
|
+
|
44
|
+
if (
|
45
|
+
prevLogFile !== logSelection.logFile ||
|
46
|
+
sampleData.sample?.id !== logSelection.sample.id ||
|
47
|
+
sampleData.sample?.epoch !== logSelection.sample.epoch ||
|
48
|
+
currentSampleCompleted !== prevCompleted
|
49
|
+
) {
|
50
|
+
loadSample(logSelection.logFile, logSelection.sample);
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}, [
|
54
|
+
logSelection.logFile,
|
55
|
+
logSelection.sample?.id,
|
56
|
+
logSelection.sample?.epoch,
|
57
|
+
logSelection.sample?.completed,
|
58
|
+
sampleData.sample?.id,
|
59
|
+
sampleData.sample?.epoch,
|
60
|
+
]);
|
61
|
+
|
62
|
+
// Scroll ref
|
63
|
+
const scrollRef = useRef<HTMLDivElement>(null);
|
34
64
|
return (
|
35
65
|
<div className={styles.container}>
|
36
|
-
<ProgressBar
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
66
|
+
<ProgressBar
|
67
|
+
animating={
|
68
|
+
sampleData.status === "loading" || sampleData.status === "streaming"
|
69
|
+
}
|
70
|
+
/>
|
71
|
+
<div className={clsx(styles.scroller)} ref={scrollRef}>
|
72
|
+
<div className={styles.body}>
|
73
|
+
{sampleData.error ? (
|
74
|
+
<ErrorPanel
|
75
|
+
title="Unable to load sample"
|
76
|
+
error={sampleData.error}
|
77
|
+
/>
|
78
|
+
) : (
|
79
|
+
<SampleDisplay
|
80
|
+
id={id}
|
81
|
+
sample={sampleData.sample}
|
82
|
+
runningEvents={sampleData.running}
|
83
|
+
selectedTab={selectedTab}
|
84
|
+
setSelectedTab={setSelectedTab}
|
85
|
+
scrollRef={scrollRef}
|
86
|
+
/>
|
87
|
+
)}
|
88
|
+
</div>
|
50
89
|
</div>
|
51
90
|
</div>
|
52
91
|
);
|
@@ -1,27 +1,22 @@
|
|
1
1
|
import { ApplicationIcons } from "../appearance/icons";
|
2
2
|
import { LargeModal, ModalTool, ModalTools } from "../components/LargeModal";
|
3
3
|
|
4
|
-
import { FC, Ref,
|
4
|
+
import { FC, Ref, useCallback, useEffect, useMemo, useRef } from "react";
|
5
5
|
import { ErrorPanel } from "../components/ErrorPanel";
|
6
|
-
import {
|
6
|
+
import { useLogSelection, usePrevious, useSampleData } from "../state/hooks";
|
7
|
+
import { useStatefulScrollPosition } from "../state/scrolling";
|
8
|
+
import { useStore } from "../state/store";
|
7
9
|
import { SampleDisplay } from "./SampleDisplay";
|
8
|
-
import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
|
9
10
|
|
10
11
|
interface SampleDialogProps {
|
11
12
|
id: string;
|
12
13
|
title: string;
|
13
|
-
sampleStatus: string;
|
14
|
-
sampleError?: Error;
|
15
|
-
sample?: EvalSample;
|
16
|
-
sampleDescriptor: SamplesDescriptor;
|
17
14
|
selectedTab?: string;
|
18
15
|
setSelectedTab: (tab: string) => void;
|
19
16
|
showingSampleDialog: boolean;
|
20
17
|
setShowingSampleDialog: (showing: boolean) => void;
|
21
18
|
nextSample: () => void;
|
22
19
|
prevSample: () => void;
|
23
|
-
sampleScrollPositionRef: RefObject<number>;
|
24
|
-
setSampleScrollPosition: (position: number) => void;
|
25
20
|
}
|
26
21
|
|
27
22
|
/**
|
@@ -30,21 +25,55 @@ interface SampleDialogProps {
|
|
30
25
|
export const SampleDialog: FC<SampleDialogProps> = ({
|
31
26
|
id,
|
32
27
|
title,
|
33
|
-
sample,
|
34
|
-
sampleDescriptor,
|
35
28
|
nextSample,
|
36
29
|
prevSample,
|
37
|
-
sampleStatus,
|
38
|
-
sampleError,
|
39
30
|
showingSampleDialog,
|
40
31
|
setShowingSampleDialog,
|
41
32
|
selectedTab,
|
42
33
|
setSelectedTab,
|
43
|
-
sampleScrollPositionRef,
|
44
|
-
setSampleScrollPosition,
|
45
34
|
}) => {
|
35
|
+
// Scroll referernce (attach stateful trackign)
|
46
36
|
const scrollRef: Ref<HTMLDivElement> = useRef(null);
|
37
|
+
useStatefulScrollPosition(scrollRef, "sample-dialog");
|
47
38
|
|
39
|
+
// Sample hooks
|
40
|
+
const sampleData = useSampleData();
|
41
|
+
const loadSample = useStore((state) => state.sampleActions.loadSample);
|
42
|
+
const logSelection = useLogSelection();
|
43
|
+
|
44
|
+
// Load sample
|
45
|
+
const prevCompleted = usePrevious(
|
46
|
+
logSelection.sample?.completed !== undefined
|
47
|
+
? logSelection.sample.completed
|
48
|
+
: true,
|
49
|
+
);
|
50
|
+
const prevLogFile = usePrevious<string | undefined>(logSelection.logFile);
|
51
|
+
useEffect(() => {
|
52
|
+
if (logSelection.logFile && logSelection.sample) {
|
53
|
+
const currentSampleCompleted =
|
54
|
+
logSelection.sample.completed !== undefined
|
55
|
+
? logSelection.sample.completed
|
56
|
+
: true;
|
57
|
+
|
58
|
+
if (
|
59
|
+
prevLogFile !== logSelection.logFile ||
|
60
|
+
sampleData.sample?.id !== logSelection.sample.id ||
|
61
|
+
sampleData.sample?.epoch !== logSelection.sample.epoch ||
|
62
|
+
currentSampleCompleted !== prevCompleted
|
63
|
+
) {
|
64
|
+
loadSample(logSelection.logFile, logSelection.sample);
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}, [
|
68
|
+
logSelection.logFile,
|
69
|
+
logSelection.sample?.id,
|
70
|
+
logSelection.sample?.epoch,
|
71
|
+
logSelection.sample?.completed,
|
72
|
+
sampleData.sample?.id,
|
73
|
+
sampleData.sample?.epoch,
|
74
|
+
]);
|
75
|
+
|
76
|
+
// Tools
|
48
77
|
const tools = useMemo<ModalTools>(() => {
|
49
78
|
const nextTool: ModalTool = {
|
50
79
|
label: "Next Sample",
|
@@ -100,18 +129,18 @@ export const SampleDialog: FC<SampleDialogProps> = ({
|
|
100
129
|
onkeyup={handleKeyUp}
|
101
130
|
visible={showingSampleDialog}
|
102
131
|
onHide={onHide}
|
103
|
-
showProgress={
|
104
|
-
|
105
|
-
|
132
|
+
showProgress={
|
133
|
+
sampleData.status === "loading" || sampleData.status === "streaming"
|
134
|
+
}
|
106
135
|
scrollRef={scrollRef}
|
107
136
|
>
|
108
|
-
{
|
109
|
-
<ErrorPanel title="Sample Error" error={
|
137
|
+
{sampleData.error ? (
|
138
|
+
<ErrorPanel title="Sample Error" error={sampleData.error} />
|
110
139
|
) : (
|
111
140
|
<SampleDisplay
|
112
141
|
id={id}
|
113
|
-
sample={sample}
|
114
|
-
|
142
|
+
sample={sampleData.sample}
|
143
|
+
runningEvents={sampleData.running}
|
115
144
|
selectedTab={selectedTab}
|
116
145
|
setSelectedTab={setSelectedTab}
|
117
146
|
scrollRef={scrollRef}
|
@@ -10,10 +10,18 @@ import { ToolButton } from "../components/ToolButton";
|
|
10
10
|
import { SampleScoreView } from "./scores/SampleScoreView";
|
11
11
|
|
12
12
|
import clsx from "clsx";
|
13
|
-
import {
|
13
|
+
import {
|
14
|
+
FC,
|
15
|
+
Fragment,
|
16
|
+
MouseEvent,
|
17
|
+
RefObject,
|
18
|
+
useCallback,
|
19
|
+
useMemo,
|
20
|
+
} from "react";
|
21
|
+
import { SampleSummary } from "../api/types";
|
14
22
|
import { Card, CardBody, CardHeader } from "../components/Card";
|
15
|
-
import { EmptyPanel } from "../components/EmptyPanel";
|
16
23
|
import { JSONPanel } from "../components/JsonPanel";
|
24
|
+
import { NoContentsPanel } from "../components/NoContentsPanel";
|
17
25
|
import {
|
18
26
|
kSampleErrorTabId,
|
19
27
|
kSampleJsonTabId,
|
@@ -22,23 +30,25 @@ import {
|
|
22
30
|
kSampleScoringTabId,
|
23
31
|
kSampleTranscriptTabId,
|
24
32
|
} from "../constants";
|
25
|
-
import {
|
33
|
+
import { useSampleSummaries } from "../state/hooks";
|
34
|
+
import { useStore } from "../state/store";
|
35
|
+
import { EvalSample, Events } from "../types/log";
|
26
36
|
import { ModelTokenTable } from "../usage/ModelTokenTable";
|
27
37
|
import { formatTime } from "../utils/format";
|
28
38
|
import { printHeadingHtml, printHtml } from "../utils/print";
|
29
39
|
import { ChatViewVirtualList } from "./chat/ChatViewVirtualList";
|
30
|
-
import {
|
40
|
+
import { messagesFromEvents } from "./chat/messages";
|
31
41
|
import styles from "./SampleDisplay.module.css";
|
32
42
|
import { SampleSummaryView } from "./SampleSummaryView";
|
33
|
-
import {
|
43
|
+
import { TranscriptVirtualList } from "./transcript/TranscriptView";
|
34
44
|
|
35
45
|
interface SampleDisplayProps {
|
36
46
|
id: string;
|
37
47
|
sample?: EvalSample;
|
38
|
-
sampleDescriptor: SamplesDescriptor;
|
39
48
|
selectedTab?: string;
|
40
49
|
setSelectedTab: (tab: string) => void;
|
41
50
|
scrollRef: RefObject<HTMLDivElement | null>;
|
51
|
+
runningEvents?: Events;
|
42
52
|
}
|
43
53
|
|
44
54
|
/**
|
@@ -47,18 +57,32 @@ interface SampleDisplayProps {
|
|
47
57
|
export const SampleDisplay: FC<SampleDisplayProps> = ({
|
48
58
|
id,
|
49
59
|
sample,
|
50
|
-
sampleDescriptor,
|
51
60
|
selectedTab,
|
52
61
|
setSelectedTab,
|
53
62
|
scrollRef,
|
63
|
+
runningEvents: runningSampleData,
|
54
64
|
}) => {
|
55
65
|
// Tab ids
|
56
66
|
const baseId = `sample-dialog`;
|
67
|
+
const sampleSummaries = useSampleSummaries();
|
68
|
+
const selectedSampleIndex = useStore(
|
69
|
+
(state) => state.log.selectedSampleIndex,
|
70
|
+
);
|
57
71
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
72
|
+
const sampleSummary = sampleSummaries[selectedSampleIndex];
|
73
|
+
|
74
|
+
// Consolidate the events and messages into the proper list
|
75
|
+
// whether running or not
|
76
|
+
const sampleEvents = sample?.events || runningSampleData;
|
77
|
+
const sampleMessages = useMemo(() => {
|
78
|
+
if (sample?.messages) {
|
79
|
+
return sample.messages;
|
80
|
+
} else if (runningSampleData) {
|
81
|
+
return messagesFromEvents(runningSampleData);
|
82
|
+
} else {
|
83
|
+
return [];
|
84
|
+
}
|
85
|
+
}, [sample?.messages, runningSampleData]);
|
62
86
|
|
63
87
|
// Tab selection
|
64
88
|
const onSelectedTab = (e: MouseEvent<HTMLElement>) => {
|
@@ -68,12 +92,16 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
68
92
|
return false;
|
69
93
|
};
|
70
94
|
|
71
|
-
const scorerNames = Object.keys(sample
|
95
|
+
const scorerNames = Object.keys(sample?.scores || {});
|
72
96
|
const sampleMetadatas = metadataViewsForSample(`${baseId}-${id}`, sample);
|
73
97
|
|
74
98
|
const tabsetId = `task-sample-details-tab-${id}`;
|
75
99
|
const targetId = `${tabsetId}-content`;
|
76
100
|
|
101
|
+
const handlePrintClick = useCallback(() => {
|
102
|
+
printSample(id, targetId);
|
103
|
+
}, [printSample, id, targetId]);
|
104
|
+
|
77
105
|
const tools = [];
|
78
106
|
if (!isVscode()) {
|
79
107
|
tools.push(
|
@@ -81,51 +109,48 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
81
109
|
key="sample-print-tool"
|
82
110
|
label="Print"
|
83
111
|
icon={ApplicationIcons.copy}
|
84
|
-
onClick={
|
85
|
-
printSample(id, targetId);
|
86
|
-
}}
|
112
|
+
onClick={handlePrintClick}
|
87
113
|
/>,
|
88
114
|
);
|
89
115
|
}
|
90
116
|
|
117
|
+
// Is the sample running?
|
118
|
+
const running = isRunning(sampleSummary, runningSampleData);
|
119
|
+
|
91
120
|
return (
|
92
121
|
<Fragment>
|
93
|
-
|
94
|
-
parent_id={id}
|
95
|
-
|
96
|
-
sampleDescriptor={sampleDescriptor}
|
97
|
-
/>
|
122
|
+
{sample || sampleSummary ? (
|
123
|
+
<SampleSummaryView parent_id={id} sample={sample || sampleSummary} />
|
124
|
+
) : undefined}
|
98
125
|
<TabSet
|
99
126
|
id={tabsetId}
|
100
127
|
tabControlsClassName={clsx("text-size-base")}
|
101
128
|
tabPanelsClassName={clsx(styles.tabPanel)}
|
102
129
|
tools={tools}
|
103
130
|
>
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
</TabPanel>
|
124
|
-
) : null}
|
131
|
+
<TabPanel
|
132
|
+
key={kSampleTranscriptTabId}
|
133
|
+
id={kSampleTranscriptTabId}
|
134
|
+
className="sample-tab"
|
135
|
+
title="Transcript"
|
136
|
+
onSelected={onSelectedTab}
|
137
|
+
selected={
|
138
|
+
selectedTab === kSampleTranscriptTabId || selectedTab === undefined
|
139
|
+
}
|
140
|
+
scrollable={false}
|
141
|
+
>
|
142
|
+
<TranscriptVirtualList
|
143
|
+
key={`${baseId}-transcript-display-${id}`}
|
144
|
+
id={`${baseId}-transcript-display-${id}`}
|
145
|
+
events={sampleEvents || []}
|
146
|
+
running={running}
|
147
|
+
scrollRef={scrollRef}
|
148
|
+
/>
|
149
|
+
</TabPanel>
|
125
150
|
<TabPanel
|
126
151
|
key={kSampleMessagesTabId}
|
127
152
|
id={kSampleMessagesTabId}
|
128
|
-
className={clsx("sample-tab", styles.fullWidth)}
|
153
|
+
className={clsx("sample-tab", styles.fullWidth, styles.chat)}
|
129
154
|
title="Messages"
|
130
155
|
onSelected={onSelectedTab}
|
131
156
|
selected={selectedTab === kSampleMessagesTabId}
|
@@ -134,13 +159,14 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
134
159
|
<ChatViewVirtualList
|
135
160
|
key={`${baseId}-chat-${id}`}
|
136
161
|
id={`${baseId}-chat-${id}`}
|
137
|
-
messages={
|
162
|
+
messages={sampleMessages}
|
138
163
|
indented={true}
|
139
164
|
scrollRef={scrollRef}
|
140
165
|
toolCallStyle="complete"
|
166
|
+
running={running}
|
141
167
|
/>
|
142
168
|
</TabPanel>
|
143
|
-
{scorerNames.length === 1 ? (
|
169
|
+
{sample && scorerNames.length === 1 ? (
|
144
170
|
<TabPanel
|
145
171
|
key={kSampleScoringTabId}
|
146
172
|
id={kSampleScoringTabId}
|
@@ -149,47 +175,43 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
149
175
|
onSelected={onSelectedTab}
|
150
176
|
selected={selectedTab === kSampleScoringTabId}
|
151
177
|
>
|
152
|
-
<SampleScoreView
|
153
|
-
sample={sample}
|
154
|
-
sampleDescriptor={sampleDescriptor}
|
155
|
-
scorer={scorerNames[0]}
|
156
|
-
/>
|
178
|
+
<SampleScoreView sample={sample} scorer={scorerNames[0]} />
|
157
179
|
</TabPanel>
|
158
180
|
) : (
|
159
181
|
<>
|
160
|
-
{
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
);
|
178
|
-
})}
|
182
|
+
{sample
|
183
|
+
? Object.keys(sample?.scores || {}).map((scorer) => {
|
184
|
+
const tabId = `score-${scorer}`;
|
185
|
+
return (
|
186
|
+
<TabPanel
|
187
|
+
key={tabId}
|
188
|
+
id={tabId}
|
189
|
+
className="sample-tab"
|
190
|
+
title={scorer}
|
191
|
+
onSelected={onSelectedTab}
|
192
|
+
selected={selectedTab === tabId}
|
193
|
+
>
|
194
|
+
<SampleScoreView sample={sample} scorer={scorer} />
|
195
|
+
</TabPanel>
|
196
|
+
);
|
197
|
+
})
|
198
|
+
: undefined}
|
179
199
|
</>
|
180
200
|
)}
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
>
|
201
|
+
<TabPanel
|
202
|
+
id={kSampleMetdataTabId}
|
203
|
+
className={clsx("sample-tab")}
|
204
|
+
title="Metadata"
|
205
|
+
onSelected={onSelectedTab}
|
206
|
+
selected={selectedTab === kSampleMetdataTabId}
|
207
|
+
>
|
208
|
+
{sampleMetadatas.length > 0 ? (
|
189
209
|
<div className={clsx(styles.metadataPanel)}>{sampleMetadatas}</div>
|
190
|
-
|
191
|
-
|
192
|
-
|
210
|
+
) : (
|
211
|
+
<NoContentsPanel text="No metadata" />
|
212
|
+
)}
|
213
|
+
</TabPanel>
|
214
|
+
{sample?.error ? (
|
193
215
|
<TabPanel
|
194
216
|
id={kSampleErrorTabId}
|
195
217
|
className="sample-tab"
|
@@ -205,14 +227,18 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
205
227
|
</div>
|
206
228
|
</TabPanel>
|
207
229
|
) : null}
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
230
|
+
<TabPanel
|
231
|
+
id={kSampleJsonTabId}
|
232
|
+
className={"sample-tab"}
|
233
|
+
title="JSON"
|
234
|
+
onSelected={onSelectedTab}
|
235
|
+
selected={selectedTab === kSampleJsonTabId}
|
236
|
+
>
|
237
|
+
{!sample ? (
|
238
|
+
<NoContentsPanel text="JSON not available" />
|
239
|
+
) : sample.messages.length > 100 ? (
|
240
|
+
<NoContentsPanel text="JSON too large too display" />
|
241
|
+
) : (
|
216
242
|
<div className={clsx(styles.padded, styles.fullWidth)}>
|
217
243
|
<JSONPanel
|
218
244
|
data={sample}
|
@@ -220,14 +246,17 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
220
246
|
className={clsx("text-size-small")}
|
221
247
|
/>
|
222
248
|
</div>
|
223
|
-
|
224
|
-
|
249
|
+
)}
|
250
|
+
</TabPanel>
|
225
251
|
</TabSet>
|
226
252
|
</Fragment>
|
227
253
|
);
|
228
254
|
};
|
229
255
|
|
230
|
-
const metadataViewsForSample = (id: string, sample
|
256
|
+
const metadataViewsForSample = (id: string, sample?: EvalSample) => {
|
257
|
+
if (!sample) {
|
258
|
+
return [];
|
259
|
+
}
|
231
260
|
const sampleMetadatas = [];
|
232
261
|
|
233
262
|
if (sample.model_usage && Object.keys(sample.model_usage).length > 0) {
|
@@ -360,3 +389,28 @@ const printSample = (id: string, targetId: string) => {
|
|
360
389
|
}
|
361
390
|
}
|
362
391
|
};
|
392
|
+
|
393
|
+
const isRunning = (
|
394
|
+
sampleSummary?: SampleSummary,
|
395
|
+
runningSampleData?: Events,
|
396
|
+
): boolean => {
|
397
|
+
if (sampleSummary && sampleSummary.completed === false) {
|
398
|
+
// An explicitly incomplete sample summary
|
399
|
+
return true;
|
400
|
+
}
|
401
|
+
|
402
|
+
if (
|
403
|
+
!sampleSummary &&
|
404
|
+
(!runningSampleData || runningSampleData.length === 0)
|
405
|
+
) {
|
406
|
+
// No sample summary yet and no running samples, must've just started
|
407
|
+
return true;
|
408
|
+
}
|
409
|
+
|
410
|
+
if (runningSampleData && runningSampleData.length > 0) {
|
411
|
+
// There are running samples
|
412
|
+
return true;
|
413
|
+
}
|
414
|
+
|
415
|
+
return false;
|
416
|
+
};
|