inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_cli/eval.py +35 -2
- inspect_ai/_cli/util.py +44 -1
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +13 -4
- inspect_ai/_display/core/results.py +1 -1
- inspect_ai/_display/textual/app.py +14 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +9 -3
- inspect_ai/_display/textual/widgets/task_detail.py +8 -8
- inspect_ai/_display/textual/widgets/tasks.py +17 -1
- inspect_ai/_display/textual/widgets/vscode.py +44 -0
- inspect_ai/_eval/eval.py +74 -25
- inspect_ai/_eval/evalset.py +22 -18
- inspect_ai/_eval/loader.py +34 -11
- inspect_ai/_eval/run.py +13 -15
- inspect_ai/_eval/score.py +13 -3
- inspect_ai/_eval/task/generate.py +8 -9
- inspect_ai/_eval/task/log.py +55 -6
- inspect_ai/_eval/task/run.py +51 -10
- inspect_ai/_eval/task/task.py +23 -9
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/file.py +30 -1
- inspect_ai/_util/json.py +37 -1
- inspect_ai/_util/registry.py +1 -0
- inspect_ai/_util/vscode.py +37 -0
- inspect_ai/_view/server.py +113 -1
- inspect_ai/_view/www/App.css +7 -1
- inspect_ai/_view/www/dist/assets/index.css +813 -415
- inspect_ai/_view/www/dist/assets/index.js +54475 -32003
- inspect_ai/_view/www/eslint.config.mjs +1 -1
- inspect_ai/_view/www/log-schema.json +137 -31
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/package.json +11 -2
- inspect_ai/_view/www/src/App.tsx +161 -853
- inspect_ai/_view/www/src/api/api-browser.ts +176 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
- inspect_ai/_view/www/src/api/client-api.ts +66 -10
- inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
- inspect_ai/_view/www/src/api/types.ts +107 -2
- inspect_ai/_view/www/src/appearance/icons.ts +2 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
- inspect_ai/_view/www/src/components/Card.tsx +6 -4
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
- inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
- inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
- inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
- inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
- inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
- inspect_ai/_view/www/src/components/Modal.module.css +38 -0
- inspect_ai/_view/www/src/components/Modal.tsx +77 -0
- inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
- inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
- inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
- inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
- inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
- inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
- inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
- inspect_ai/_view/www/src/index.tsx +26 -94
- inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
- inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
- inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
- inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
- inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
- inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
- inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
- inspect_ai/_view/www/src/scoring/utils.ts +87 -0
- inspect_ai/_view/www/src/state/appSlice.ts +244 -0
- inspect_ai/_view/www/src/state/hooks.ts +399 -0
- inspect_ai/_view/www/src/state/logPolling.ts +200 -0
- inspect_ai/_view/www/src/state/logSlice.ts +224 -0
- inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
- inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
- inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
- inspect_ai/_view/www/src/state/scrolling.ts +206 -0
- inspect_ai/_view/www/src/state/store.ts +168 -0
- inspect_ai/_view/www/src/state/store_filter.ts +84 -0
- inspect_ai/_view/www/src/state/utils.ts +23 -0
- inspect_ai/_view/www/src/storage/index.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +36 -26
- inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
- inspect_ai/_view/www/src/types.ts +94 -32
- inspect_ai/_view/www/src/utils/attachments.ts +58 -23
- inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
- inspect_ai/_view/www/src/utils/logger.ts +52 -0
- inspect_ai/_view/www/src/utils/polling.ts +100 -0
- inspect_ai/_view/www/src/utils/react.ts +30 -0
- inspect_ai/_view/www/src/utils/vscode.ts +1 -1
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
- inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
- inspect_ai/_view/www/src/workspace/types.ts +4 -3
- inspect_ai/_view/www/src/workspace/utils.ts +4 -4
- inspect_ai/_view/www/vite.config.js +6 -0
- inspect_ai/_view/www/yarn.lock +464 -355
- inspect_ai/agent/__init__.py +36 -0
- inspect_ai/agent/_agent.py +268 -0
- inspect_ai/agent/_as_solver.py +72 -0
- inspect_ai/agent/_as_tool.py +122 -0
- inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
- inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
- inspect_ai/agent/_filter.py +46 -0
- inspect_ai/agent/_handoff.py +93 -0
- inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
- inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
- inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
- inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
- inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
- inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
- inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
- inspect_ai/agent/_react.py +241 -0
- inspect_ai/agent/_run.py +36 -0
- inspect_ai/agent/_types.py +81 -0
- inspect_ai/log/_condense.py +26 -0
- inspect_ai/log/_log.py +17 -5
- inspect_ai/log/_recorders/buffer/__init__.py +14 -0
- inspect_ai/log/_recorders/buffer/buffer.py +30 -0
- inspect_ai/log/_recorders/buffer/database.py +685 -0
- inspect_ai/log/_recorders/buffer/filestore.py +259 -0
- inspect_ai/log/_recorders/buffer/types.py +84 -0
- inspect_ai/log/_recorders/eval.py +2 -11
- inspect_ai/log/_recorders/types.py +30 -0
- inspect_ai/log/_transcript.py +32 -2
- inspect_ai/model/__init__.py +7 -1
- inspect_ai/model/_call_tools.py +257 -52
- inspect_ai/model/_chat_message.py +7 -4
- inspect_ai/model/_conversation.py +13 -62
- inspect_ai/model/_display.py +85 -0
- inspect_ai/model/_generate_config.py +2 -2
- inspect_ai/model/_model.py +114 -14
- inspect_ai/model/_model_output.py +14 -9
- inspect_ai/model/_openai.py +16 -4
- inspect_ai/model/_openai_computer_use.py +162 -0
- inspect_ai/model/_openai_responses.py +319 -165
- inspect_ai/model/_providers/anthropic.py +20 -21
- inspect_ai/model/_providers/azureai.py +24 -13
- inspect_ai/model/_providers/bedrock.py +1 -7
- inspect_ai/model/_providers/cloudflare.py +3 -3
- inspect_ai/model/_providers/goodfire.py +2 -6
- inspect_ai/model/_providers/google.py +11 -10
- inspect_ai/model/_providers/groq.py +6 -3
- inspect_ai/model/_providers/hf.py +7 -3
- inspect_ai/model/_providers/mistral.py +7 -10
- inspect_ai/model/_providers/openai.py +47 -17
- inspect_ai/model/_providers/openai_o1.py +11 -4
- inspect_ai/model/_providers/openai_responses.py +12 -14
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/together.py +12 -2
- inspect_ai/model/_providers/util/chatapi.py +7 -2
- inspect_ai/model/_providers/util/hf_handler.py +4 -2
- inspect_ai/model/_providers/util/llama31.py +4 -2
- inspect_ai/model/_providers/vertex.py +11 -9
- inspect_ai/model/_providers/vllm.py +4 -4
- inspect_ai/scorer/__init__.py +2 -0
- inspect_ai/scorer/_metrics/__init__.py +2 -0
- inspect_ai/scorer/_metrics/grouped.py +84 -0
- inspect_ai/scorer/_score.py +26 -6
- inspect_ai/solver/__init__.py +2 -2
- inspect_ai/solver/_basic_agent.py +22 -9
- inspect_ai/solver/_bridge.py +31 -0
- inspect_ai/solver/_chain.py +20 -12
- inspect_ai/solver/_fork.py +5 -1
- inspect_ai/solver/_human_agent.py +52 -0
- inspect_ai/solver/_prompt.py +3 -1
- inspect_ai/solver/_run.py +59 -0
- inspect_ai/solver/_solver.py +14 -4
- inspect_ai/solver/_task_state.py +5 -3
- inspect_ai/tool/_tool_call.py +15 -8
- inspect_ai/tool/_tool_def.py +17 -12
- inspect_ai/tool/_tool_support_helpers.py +4 -4
- inspect_ai/tool/_tool_with.py +14 -11
- inspect_ai/tool/_tools/_bash_session.py +11 -2
- inspect_ai/tool/_tools/_computer/_common.py +18 -2
- inspect_ai/tool/_tools/_computer/_computer.py +18 -2
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
- inspect_ai/tool/_tools/_think.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_anyio.py +27 -0
- inspect_ai/util/_sandbox/__init__.py +2 -1
- inspect_ai/util/_sandbox/context.py +32 -7
- inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/docker/docker.py +12 -1
- inspect_ai/util/_store_model.py +30 -7
- inspect_ai/util/_subprocess.py +13 -3
- inspect_ai/util/_subtask.py +1 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
- /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -1,135 +1,153 @@
|
|
1
1
|
import {
|
2
2
|
FC,
|
3
3
|
Fragment,
|
4
|
-
RefObject,
|
5
4
|
useCallback,
|
6
5
|
useEffect,
|
6
|
+
useMemo,
|
7
7
|
useRef,
|
8
8
|
useState,
|
9
9
|
} from "react";
|
10
10
|
import { VirtuosoHandle } from "react-virtuoso";
|
11
|
-
import {
|
12
|
-
import {
|
13
|
-
import {
|
14
|
-
import {
|
15
|
-
import {
|
16
|
-
|
17
|
-
|
18
|
-
|
11
|
+
import { NoContentsPanel } from "../../components/NoContentsPanel.tsx";
|
12
|
+
import { InlineSampleDisplay } from "../../samples/InlineSampleDisplay.tsx";
|
13
|
+
import { SampleDialog } from "../../samples/SampleDialog.tsx";
|
14
|
+
import { SampleList } from "../../samples/list/SampleList.tsx";
|
15
|
+
import {
|
16
|
+
useFilteredSamples,
|
17
|
+
useGroupBy,
|
18
|
+
useGroupByOrder,
|
19
|
+
useSampleDescriptor,
|
20
|
+
useScore,
|
21
|
+
useTotalSampleCount,
|
22
|
+
} from "../../state/hooks.ts";
|
23
|
+
import { useStore } from "../../state/store.ts";
|
24
|
+
import { RunningNoSamples } from "./RunningNoSamples.tsx";
|
19
25
|
import { getSampleProcessor } from "./grouping.ts";
|
20
26
|
import { ListItem } from "./types.ts";
|
21
27
|
|
22
28
|
interface SamplesTabProps {
|
23
|
-
// Optional props
|
24
|
-
sample?: EvalSample;
|
25
|
-
samples?: SampleSummary[];
|
26
|
-
sampleDescriptor?: SamplesDescriptor;
|
27
|
-
sampleError?: Error;
|
28
|
-
|
29
29
|
// Required props
|
30
|
-
|
31
|
-
groupBy: "epoch" | "sample" | "none";
|
32
|
-
groupByOrder: "asc" | "desc";
|
33
|
-
sampleStatus: string;
|
34
|
-
selectedSampleIndex: number;
|
35
|
-
setSelectedSampleIndex: (index: number) => void;
|
36
|
-
showingSampleDialog: boolean;
|
37
|
-
setShowingSampleDialog: (showing: boolean) => void;
|
38
|
-
selectedSampleTab?: string;
|
39
|
-
setSelectedSampleTab: (tab: string) => void;
|
40
|
-
epoch: string;
|
41
|
-
filter: ScoreFilter;
|
42
|
-
sampleScrollPositionRef: RefObject<number>;
|
43
|
-
setSampleScrollPosition: (position: number) => void;
|
44
|
-
sampleTabScrollRef: RefObject<HTMLDivElement | null>;
|
30
|
+
running: boolean;
|
45
31
|
}
|
46
32
|
|
47
|
-
export const SamplesTab: FC<SamplesTabProps> = ({
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
33
|
+
export const SamplesTab: FC<SamplesTabProps> = ({ running }) => {
|
34
|
+
const selectSample = useStore((state) => state.logActions.selectSample);
|
35
|
+
const selectedSampleIndex = useStore(
|
36
|
+
(state) => state.log.selectedSampleIndex,
|
37
|
+
);
|
38
|
+
|
39
|
+
const sampleSummaries = useFilteredSamples();
|
40
|
+
const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
|
41
|
+
|
42
|
+
// Compute the limit to apply to the sample count (this is so)
|
43
|
+
// we can provide a total expected sample count for this evaluation
|
44
|
+
const evalSampleCount = useMemo(() => {
|
45
|
+
const limit = selectedLogSummary?.eval.config.limit;
|
46
|
+
const limitCount =
|
47
|
+
limit === null || limit === undefined
|
48
|
+
? undefined
|
49
|
+
: typeof limit === "number"
|
50
|
+
? limit
|
51
|
+
: (limit[1] as number) - (limit[0] as number);
|
52
|
+
return (
|
53
|
+
(limitCount || selectedLogSummary?.eval.dataset.samples || 0) *
|
54
|
+
(selectedLogSummary?.eval.config.epochs || 0)
|
55
|
+
);
|
56
|
+
}, [selectedLogSummary?.eval.config.limit]);
|
57
|
+
|
58
|
+
const totalSampleCount = useTotalSampleCount();
|
59
|
+
|
60
|
+
const samplesDescriptor = useSampleDescriptor();
|
61
|
+
const groupBy = useGroupBy();
|
62
|
+
const groupByOrder = useGroupByOrder();
|
63
|
+
const currentScore = useScore();
|
64
|
+
|
65
|
+
const selectedSample = useStore((state) => state.sample.selectedSample);
|
66
|
+
|
66
67
|
const [items, setItems] = useState<ListItem[]>([]);
|
67
68
|
const [sampleItems, setSampleItems] = useState<ListItem[]>([]);
|
68
69
|
|
69
70
|
const sampleListHandle = useRef<VirtuosoHandle | null>(null);
|
70
71
|
const sampleDialogRef = useRef<HTMLDivElement>(null);
|
71
72
|
|
73
|
+
const selectedSampleTab = useStore((state) => state.app.tabs.sample);
|
74
|
+
const setSelectedSampleTab = useStore(
|
75
|
+
(state) => state.appActions.setSampleTab,
|
76
|
+
);
|
77
|
+
const showingSampleDialog = useStore((state) => state.app.dialogs.sample);
|
78
|
+
const setShowingSampleDialog = useStore(
|
79
|
+
(state) => state.appActions.setShowingSampleDialog,
|
80
|
+
);
|
81
|
+
|
72
82
|
// Shows the sample dialog
|
73
83
|
const showSample = useCallback(
|
74
84
|
(index: number) => {
|
75
|
-
|
85
|
+
selectSample(index);
|
76
86
|
setShowingSampleDialog(true);
|
77
87
|
},
|
78
|
-
[
|
88
|
+
[selectSample, setShowingSampleDialog],
|
79
89
|
);
|
80
90
|
|
91
|
+
// Keep the selected item scrolled into view
|
92
|
+
useEffect(() => {
|
93
|
+
setTimeout(() => {
|
94
|
+
if (sampleListHandle.current) {
|
95
|
+
sampleListHandle.current.scrollIntoView({ index: selectedSampleIndex });
|
96
|
+
}
|
97
|
+
}, 0);
|
98
|
+
}, [selectedSampleIndex]);
|
99
|
+
|
100
|
+
// Focus the dialog when it is shown
|
81
101
|
useEffect(() => {
|
82
102
|
if (showingSampleDialog) {
|
83
103
|
setTimeout(() => {
|
84
104
|
sampleDialogRef.current?.focus();
|
85
105
|
}, 0);
|
86
|
-
} else {
|
87
|
-
setTimeout(() => {
|
88
|
-
if (sampleListHandle.current) {
|
89
|
-
sampleListHandle.current.scrollToIndex(0);
|
90
|
-
}
|
91
|
-
}, 0);
|
92
106
|
}
|
93
107
|
}, [showingSampleDialog]);
|
94
108
|
|
109
|
+
const sampleProcessor = useMemo(() => {
|
110
|
+
if (!samplesDescriptor) return undefined;
|
111
|
+
|
112
|
+
return getSampleProcessor(
|
113
|
+
sampleSummaries || [],
|
114
|
+
selectedLogSummary?.eval?.config?.epochs || 1,
|
115
|
+
groupBy,
|
116
|
+
groupByOrder,
|
117
|
+
samplesDescriptor,
|
118
|
+
currentScore,
|
119
|
+
);
|
120
|
+
}, [
|
121
|
+
samplesDescriptor,
|
122
|
+
sampleSummaries,
|
123
|
+
selectedLogSummary?.eval?.config?.epochs,
|
124
|
+
groupBy,
|
125
|
+
groupByOrder,
|
126
|
+
currentScore,
|
127
|
+
]);
|
128
|
+
|
95
129
|
useEffect(() => {
|
96
|
-
const
|
97
|
-
? getSampleProcessor(
|
98
|
-
samples || [],
|
99
|
-
groupBy,
|
100
|
-
groupByOrder,
|
101
|
-
sampleDescriptor,
|
102
|
-
)
|
103
|
-
: undefined;
|
104
|
-
|
105
|
-
// Process the samples into the proper data structure
|
106
|
-
const items = samples?.flatMap((sample, index) => {
|
130
|
+
const resolvedSamples = sampleSummaries?.flatMap((sample, index) => {
|
107
131
|
const results: ListItem[] = [];
|
108
|
-
const previousSample =
|
132
|
+
const previousSample =
|
133
|
+
index !== 0 ? sampleSummaries[index - 1] : undefined;
|
109
134
|
const items = sampleProcessor
|
110
135
|
? sampleProcessor(sample, index, previousSample)
|
111
136
|
: [];
|
137
|
+
|
112
138
|
results.push(...items);
|
113
139
|
return results;
|
114
140
|
});
|
115
141
|
|
116
|
-
setItems(
|
142
|
+
setItems(resolvedSamples || []);
|
117
143
|
setSampleItems(
|
118
|
-
|
119
|
-
?
|
144
|
+
resolvedSamples
|
145
|
+
? resolvedSamples.filter((item) => {
|
120
146
|
return item.type === "sample";
|
121
147
|
})
|
122
148
|
: [],
|
123
149
|
);
|
124
|
-
}, [
|
125
|
-
|
126
|
-
const nextSampleIndex = useCallback(() => {
|
127
|
-
if (selectedSampleIndex < sampleItems.length - 1) {
|
128
|
-
return selectedSampleIndex + 1;
|
129
|
-
} else {
|
130
|
-
return -1;
|
131
|
-
}
|
132
|
-
}, [selectedSampleIndex, sampleItems.length]);
|
150
|
+
}, [sampleSummaries, sampleProcessor]);
|
133
151
|
|
134
152
|
const previousSampleIndex = useCallback(() => {
|
135
153
|
return selectedSampleIndex > 0 ? selectedSampleIndex - 1 : -1;
|
@@ -137,68 +155,63 @@ export const SamplesTab: FC<SamplesTabProps> = ({
|
|
137
155
|
|
138
156
|
// Manage the next / previous state the selected sample
|
139
157
|
const nextSample = useCallback(() => {
|
140
|
-
const next =
|
141
|
-
if (
|
142
|
-
|
158
|
+
const next = Math.min(selectedSampleIndex + 1, sampleItems.length - 1);
|
159
|
+
if (next > -1) {
|
160
|
+
selectSample(next);
|
143
161
|
}
|
144
|
-
}, [
|
162
|
+
}, [selectedSampleIndex, sampleItems, selectSample]);
|
145
163
|
|
146
164
|
const previousSample = useCallback(() => {
|
147
165
|
const prev = previousSampleIndex();
|
148
|
-
if (
|
149
|
-
|
166
|
+
if (prev > -1) {
|
167
|
+
selectSample(prev);
|
150
168
|
}
|
151
|
-
}, [previousSampleIndex,
|
169
|
+
}, [previousSampleIndex, selectSample]);
|
152
170
|
|
153
171
|
const title =
|
154
172
|
selectedSampleIndex > -1 && sampleItems.length > selectedSampleIndex
|
155
173
|
? sampleItems[selectedSampleIndex].label
|
156
174
|
: "";
|
157
175
|
|
158
|
-
if (
|
159
|
-
|
176
|
+
if (totalSampleCount === 0) {
|
177
|
+
if (running) {
|
178
|
+
return <RunningNoSamples />;
|
179
|
+
} else {
|
180
|
+
return <NoContentsPanel text="No samples" />;
|
181
|
+
}
|
160
182
|
} else {
|
161
183
|
return (
|
162
184
|
<Fragment>
|
163
|
-
{
|
185
|
+
{samplesDescriptor && totalSampleCount === 1 ? (
|
164
186
|
<InlineSampleDisplay
|
165
187
|
id="sample-display"
|
166
|
-
sample={sample}
|
167
|
-
sampleStatus={sampleStatus}
|
168
|
-
sampleError={sampleError}
|
169
|
-
sampleDescriptor={sampleDescriptor}
|
170
188
|
selectedTab={selectedSampleTab}
|
171
189
|
setSelectedTab={setSelectedSampleTab}
|
172
|
-
scrollRef={sampleTabScrollRef}
|
173
190
|
/>
|
174
191
|
) : undefined}
|
175
|
-
{
|
192
|
+
{samplesDescriptor && totalSampleCount > 1 ? (
|
176
193
|
<SampleList
|
177
194
|
listHandle={sampleListHandle}
|
178
195
|
items={items}
|
179
|
-
|
180
|
-
|
196
|
+
totalItemCount={evalSampleCount}
|
197
|
+
running={running}
|
181
198
|
nextSample={nextSample}
|
182
199
|
prevSample={previousSample}
|
183
200
|
showSample={showSample}
|
184
201
|
/>
|
185
202
|
) : undefined}
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
prevSample={previousSample}
|
199
|
-
sampleScrollPositionRef={sampleScrollPositionRef}
|
200
|
-
setSampleScrollPosition={setSampleScrollPosition}
|
201
|
-
/>
|
203
|
+
{showingSampleDialog ? (
|
204
|
+
<SampleDialog
|
205
|
+
id={String(selectedSample?.id || "")}
|
206
|
+
title={title}
|
207
|
+
showingSampleDialog={showingSampleDialog}
|
208
|
+
setShowingSampleDialog={setShowingSampleDialog}
|
209
|
+
selectedTab={selectedSampleTab}
|
210
|
+
setSelectedTab={setSelectedSampleTab}
|
211
|
+
nextSample={nextSample}
|
212
|
+
prevSample={previousSample}
|
213
|
+
/>
|
214
|
+
) : undefined}
|
202
215
|
</Fragment>
|
203
216
|
);
|
204
217
|
}
|
@@ -1,12 +1,16 @@
|
|
1
1
|
import { SampleSummary } from "../../api/types";
|
2
2
|
import { SamplesDescriptor } from "../../samples/descriptor/samplesDescriptor";
|
3
|
+
import { ScoreLabel } from "../../types";
|
4
|
+
import { Epochs } from "../../types/log";
|
3
5
|
import { ListItem, SampleListItem, SeparatorListItem } from "./types";
|
4
6
|
|
5
7
|
export const getSampleProcessor = (
|
6
8
|
samples: SampleSummary[],
|
9
|
+
epochs: Epochs,
|
7
10
|
groupBy: "sample" | "epoch" | "none",
|
8
11
|
groupByOrder: "asc" | "desc",
|
9
12
|
sampleDescriptor: SamplesDescriptor,
|
13
|
+
score?: ScoreLabel,
|
10
14
|
): ((
|
11
15
|
sample: SampleSummary,
|
12
16
|
index: number,
|
@@ -14,11 +18,17 @@ export const getSampleProcessor = (
|
|
14
18
|
) => ListItem[]) => {
|
15
19
|
// Perform grouping if there are epochs
|
16
20
|
if (groupBy == "epoch") {
|
17
|
-
return groupByEpoch(samples, sampleDescriptor, groupByOrder);
|
21
|
+
return groupByEpoch(samples, epochs, sampleDescriptor, groupByOrder, score);
|
18
22
|
} else if (groupBy === "sample") {
|
19
|
-
return groupBySample(
|
23
|
+
return groupBySample(
|
24
|
+
samples,
|
25
|
+
epochs,
|
26
|
+
sampleDescriptor,
|
27
|
+
groupByOrder,
|
28
|
+
score,
|
29
|
+
);
|
20
30
|
} else {
|
21
|
-
return noGrouping(samples, groupByOrder);
|
31
|
+
return noGrouping(samples, groupByOrder, sampleDescriptor, score);
|
22
32
|
}
|
23
33
|
};
|
24
34
|
|
@@ -28,6 +38,8 @@ export const getSampleProcessor = (
|
|
28
38
|
const noGrouping = (
|
29
39
|
samples: SampleSummary[],
|
30
40
|
order: "asc" | "desc",
|
41
|
+
sampleDescriptor: SamplesDescriptor,
|
42
|
+
score?: ScoreLabel,
|
31
43
|
): ((sample: SampleSummary, index: number) => ListItem[]) => {
|
32
44
|
const counter = getCounter(samples.length, 1, order);
|
33
45
|
return (sample: SampleSummary, index: number) => {
|
@@ -40,6 +52,12 @@ const noGrouping = (
|
|
40
52
|
index: index,
|
41
53
|
data: sample,
|
42
54
|
type: "sample",
|
55
|
+
answer:
|
56
|
+
sampleDescriptor.selectedScorerDescriptor(sample)?.answer() || "",
|
57
|
+
scoreRendered: sampleDescriptor.evalDescriptor
|
58
|
+
.score(sample, score)
|
59
|
+
?.render(),
|
60
|
+
completed: sample.completed !== undefined ? sample.completed : true,
|
43
61
|
},
|
44
62
|
];
|
45
63
|
};
|
@@ -50,8 +68,10 @@ const noGrouping = (
|
|
50
68
|
*/
|
51
69
|
const groupBySample = (
|
52
70
|
samples: SampleSummary[],
|
71
|
+
epochs: Epochs,
|
53
72
|
sampleDescriptor: SamplesDescriptor,
|
54
73
|
order: "asc" | "desc",
|
74
|
+
score?: ScoreLabel,
|
55
75
|
): ((
|
56
76
|
sample: SampleSummary,
|
57
77
|
index: number,
|
@@ -73,7 +93,7 @@ const groupBySample = (
|
|
73
93
|
}
|
74
94
|
}
|
75
95
|
});
|
76
|
-
const groupCount = samples.length /
|
96
|
+
const groupCount = samples.length / (epochs || 1);
|
77
97
|
const itemCount = samples.length / groupCount;
|
78
98
|
const counter = getCounter(itemCount, groupCount, order);
|
79
99
|
return (
|
@@ -103,6 +123,11 @@ const groupBySample = (
|
|
103
123
|
index: index,
|
104
124
|
data: sample,
|
105
125
|
type: "sample",
|
126
|
+
answer: sampleDescriptor.selectedScorerDescriptor(sample)?.answer() || "",
|
127
|
+
scoreRendered: sampleDescriptor.evalDescriptor
|
128
|
+
.score(sample, score)
|
129
|
+
?.render(),
|
130
|
+
completed: sample.completed !== undefined ? sample.completed : true,
|
106
131
|
} as SampleListItem);
|
107
132
|
|
108
133
|
return results;
|
@@ -114,14 +139,16 @@ const groupBySample = (
|
|
114
139
|
*/
|
115
140
|
const groupByEpoch = (
|
116
141
|
samples: SampleSummary[],
|
142
|
+
epochs: Epochs,
|
117
143
|
sampleDescriptor: SamplesDescriptor,
|
118
144
|
order: "asc" | "desc",
|
145
|
+
score?: ScoreLabel,
|
119
146
|
): ((
|
120
147
|
sample: SampleSummary,
|
121
148
|
index: number,
|
122
149
|
previousSample?: SampleSummary,
|
123
150
|
) => ListItem[]) => {
|
124
|
-
const groupCount =
|
151
|
+
const groupCount = epochs || 1;
|
125
152
|
const itemCount = samples.length / groupCount;
|
126
153
|
const counter = getCounter(itemCount, groupCount, order);
|
127
154
|
|
@@ -153,6 +180,11 @@ const groupByEpoch = (
|
|
153
180
|
index: index,
|
154
181
|
data: sample,
|
155
182
|
type: "sample",
|
183
|
+
answer: sampleDescriptor.selectedScorerDescriptor(sample)?.answer() || "",
|
184
|
+
scoreRendered: sampleDescriptor.evalDescriptor
|
185
|
+
.score(sample, score)
|
186
|
+
?.render(),
|
187
|
+
completed: sample.completed !== undefined ? sample.completed : true,
|
156
188
|
} as SampleListItem);
|
157
189
|
|
158
190
|
return results;
|
@@ -1,11 +1,15 @@
|
|
1
|
+
import { ReactNode } from "react";
|
1
2
|
import { SampleSummary } from "../../api/types";
|
2
3
|
|
3
4
|
export interface SampleListItem {
|
4
5
|
label: string;
|
5
6
|
index: number;
|
6
7
|
number: number;
|
8
|
+
answer: string;
|
9
|
+
scoreRendered: ReactNode;
|
7
10
|
data: SampleSummary;
|
8
11
|
type: "sample";
|
12
|
+
completed: boolean;
|
9
13
|
}
|
10
14
|
|
11
15
|
export interface SeparatorListItem {
|
@@ -1,10 +1,11 @@
|
|
1
|
-
import { ReactNode, RefObject } from "react";
|
1
|
+
import { ComponentType, ReactNode, RefObject } from "react";
|
2
2
|
|
3
|
-
export interface TabDescriptor {
|
3
|
+
export interface TabDescriptor<P> {
|
4
4
|
id: string;
|
5
5
|
scrollable: boolean;
|
6
6
|
scrollRef?: RefObject<HTMLDivElement | null>;
|
7
7
|
label: string;
|
8
|
-
|
8
|
+
component: ComponentType<P>;
|
9
|
+
componentProps: P;
|
9
10
|
tools?: () => ReactNode[] | undefined;
|
10
11
|
}
|
@@ -1,6 +1,6 @@
|
|
1
|
-
import {
|
1
|
+
import { ResultsMetric } from "./navbar/ResultsPanel";
|
2
2
|
|
3
|
-
export const metricDisplayName = (metric:
|
3
|
+
export const metricDisplayName = (metric: ResultsMetric): string => {
|
4
4
|
let modifier = undefined;
|
5
5
|
for (const metricModifier of metricModifiers) {
|
6
6
|
modifier = metricModifier(metric);
|
@@ -13,10 +13,10 @@ export const metricDisplayName = (metric: EvalMetric): string => {
|
|
13
13
|
return metricName;
|
14
14
|
};
|
15
15
|
|
16
|
-
type MetricModifier = (metric:
|
16
|
+
type MetricModifier = (metric: ResultsMetric) => string | undefined;
|
17
17
|
|
18
18
|
const clusterMetricModifier: MetricModifier = (
|
19
|
-
metric:
|
19
|
+
metric: ResultsMetric,
|
20
20
|
): string | undefined => {
|
21
21
|
if (metric.name !== "stderr") {
|
22
22
|
return undefined;
|
@@ -24,4 +24,10 @@ export default defineConfig({
|
|
24
24
|
resolve: {
|
25
25
|
dedupe: ["react", "react-dom"],
|
26
26
|
},
|
27
|
+
define: {
|
28
|
+
__DEV_WATCH__: JSON.stringify(process.env.DEV_LOGGING === "true"),
|
29
|
+
__LOGGING_FILTER__: JSON.stringify(
|
30
|
+
process.env.DEV_LOGGING_NAMESPACES || "*",
|
31
|
+
),
|
32
|
+
},
|
27
33
|
});
|