inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_cli/eval.py +35 -2
- inspect_ai/_cli/util.py +44 -1
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +13 -4
- inspect_ai/_display/core/results.py +1 -1
- inspect_ai/_display/textual/app.py +14 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +9 -3
- inspect_ai/_display/textual/widgets/task_detail.py +8 -8
- inspect_ai/_display/textual/widgets/tasks.py +17 -1
- inspect_ai/_display/textual/widgets/vscode.py +44 -0
- inspect_ai/_eval/eval.py +74 -25
- inspect_ai/_eval/evalset.py +22 -18
- inspect_ai/_eval/loader.py +34 -11
- inspect_ai/_eval/run.py +13 -15
- inspect_ai/_eval/score.py +13 -3
- inspect_ai/_eval/task/generate.py +8 -9
- inspect_ai/_eval/task/log.py +55 -6
- inspect_ai/_eval/task/run.py +51 -10
- inspect_ai/_eval/task/task.py +23 -9
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/file.py +30 -1
- inspect_ai/_util/json.py +37 -1
- inspect_ai/_util/registry.py +1 -0
- inspect_ai/_util/vscode.py +37 -0
- inspect_ai/_view/server.py +113 -1
- inspect_ai/_view/www/App.css +7 -1
- inspect_ai/_view/www/dist/assets/index.css +813 -415
- inspect_ai/_view/www/dist/assets/index.js +54475 -32003
- inspect_ai/_view/www/eslint.config.mjs +1 -1
- inspect_ai/_view/www/log-schema.json +137 -31
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/package.json +11 -2
- inspect_ai/_view/www/src/App.tsx +161 -853
- inspect_ai/_view/www/src/api/api-browser.ts +176 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
- inspect_ai/_view/www/src/api/client-api.ts +66 -10
- inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
- inspect_ai/_view/www/src/api/types.ts +107 -2
- inspect_ai/_view/www/src/appearance/icons.ts +2 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
- inspect_ai/_view/www/src/components/Card.tsx +6 -4
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
- inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
- inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
- inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
- inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
- inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
- inspect_ai/_view/www/src/components/Modal.module.css +38 -0
- inspect_ai/_view/www/src/components/Modal.tsx +77 -0
- inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
- inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
- inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
- inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
- inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
- inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
- inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
- inspect_ai/_view/www/src/index.tsx +26 -94
- inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
- inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
- inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
- inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
- inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
- inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
- inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
- inspect_ai/_view/www/src/scoring/utils.ts +87 -0
- inspect_ai/_view/www/src/state/appSlice.ts +244 -0
- inspect_ai/_view/www/src/state/hooks.ts +399 -0
- inspect_ai/_view/www/src/state/logPolling.ts +200 -0
- inspect_ai/_view/www/src/state/logSlice.ts +224 -0
- inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
- inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
- inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
- inspect_ai/_view/www/src/state/scrolling.ts +206 -0
- inspect_ai/_view/www/src/state/store.ts +168 -0
- inspect_ai/_view/www/src/state/store_filter.ts +84 -0
- inspect_ai/_view/www/src/state/utils.ts +23 -0
- inspect_ai/_view/www/src/storage/index.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +36 -26
- inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
- inspect_ai/_view/www/src/types.ts +94 -32
- inspect_ai/_view/www/src/utils/attachments.ts +58 -23
- inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
- inspect_ai/_view/www/src/utils/logger.ts +52 -0
- inspect_ai/_view/www/src/utils/polling.ts +100 -0
- inspect_ai/_view/www/src/utils/react.ts +30 -0
- inspect_ai/_view/www/src/utils/vscode.ts +1 -1
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
- inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
- inspect_ai/_view/www/src/workspace/types.ts +4 -3
- inspect_ai/_view/www/src/workspace/utils.ts +4 -4
- inspect_ai/_view/www/vite.config.js +6 -0
- inspect_ai/_view/www/yarn.lock +464 -355
- inspect_ai/agent/__init__.py +36 -0
- inspect_ai/agent/_agent.py +268 -0
- inspect_ai/agent/_as_solver.py +72 -0
- inspect_ai/agent/_as_tool.py +122 -0
- inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
- inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
- inspect_ai/agent/_filter.py +46 -0
- inspect_ai/agent/_handoff.py +93 -0
- inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
- inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
- inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
- inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
- inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
- inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
- inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
- inspect_ai/agent/_react.py +241 -0
- inspect_ai/agent/_run.py +36 -0
- inspect_ai/agent/_types.py +81 -0
- inspect_ai/log/_condense.py +26 -0
- inspect_ai/log/_log.py +17 -5
- inspect_ai/log/_recorders/buffer/__init__.py +14 -0
- inspect_ai/log/_recorders/buffer/buffer.py +30 -0
- inspect_ai/log/_recorders/buffer/database.py +685 -0
- inspect_ai/log/_recorders/buffer/filestore.py +259 -0
- inspect_ai/log/_recorders/buffer/types.py +84 -0
- inspect_ai/log/_recorders/eval.py +2 -11
- inspect_ai/log/_recorders/types.py +30 -0
- inspect_ai/log/_transcript.py +32 -2
- inspect_ai/model/__init__.py +7 -1
- inspect_ai/model/_call_tools.py +257 -52
- inspect_ai/model/_chat_message.py +7 -4
- inspect_ai/model/_conversation.py +13 -62
- inspect_ai/model/_display.py +85 -0
- inspect_ai/model/_generate_config.py +2 -2
- inspect_ai/model/_model.py +114 -14
- inspect_ai/model/_model_output.py +14 -9
- inspect_ai/model/_openai.py +16 -4
- inspect_ai/model/_openai_computer_use.py +162 -0
- inspect_ai/model/_openai_responses.py +319 -165
- inspect_ai/model/_providers/anthropic.py +20 -21
- inspect_ai/model/_providers/azureai.py +24 -13
- inspect_ai/model/_providers/bedrock.py +1 -7
- inspect_ai/model/_providers/cloudflare.py +3 -3
- inspect_ai/model/_providers/goodfire.py +2 -6
- inspect_ai/model/_providers/google.py +11 -10
- inspect_ai/model/_providers/groq.py +6 -3
- inspect_ai/model/_providers/hf.py +7 -3
- inspect_ai/model/_providers/mistral.py +7 -10
- inspect_ai/model/_providers/openai.py +47 -17
- inspect_ai/model/_providers/openai_o1.py +11 -4
- inspect_ai/model/_providers/openai_responses.py +12 -14
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/together.py +12 -2
- inspect_ai/model/_providers/util/chatapi.py +7 -2
- inspect_ai/model/_providers/util/hf_handler.py +4 -2
- inspect_ai/model/_providers/util/llama31.py +4 -2
- inspect_ai/model/_providers/vertex.py +11 -9
- inspect_ai/model/_providers/vllm.py +4 -4
- inspect_ai/scorer/__init__.py +2 -0
- inspect_ai/scorer/_metrics/__init__.py +2 -0
- inspect_ai/scorer/_metrics/grouped.py +84 -0
- inspect_ai/scorer/_score.py +26 -6
- inspect_ai/solver/__init__.py +2 -2
- inspect_ai/solver/_basic_agent.py +22 -9
- inspect_ai/solver/_bridge.py +31 -0
- inspect_ai/solver/_chain.py +20 -12
- inspect_ai/solver/_fork.py +5 -1
- inspect_ai/solver/_human_agent.py +52 -0
- inspect_ai/solver/_prompt.py +3 -1
- inspect_ai/solver/_run.py +59 -0
- inspect_ai/solver/_solver.py +14 -4
- inspect_ai/solver/_task_state.py +5 -3
- inspect_ai/tool/_tool_call.py +15 -8
- inspect_ai/tool/_tool_def.py +17 -12
- inspect_ai/tool/_tool_support_helpers.py +4 -4
- inspect_ai/tool/_tool_with.py +14 -11
- inspect_ai/tool/_tools/_bash_session.py +11 -2
- inspect_ai/tool/_tools/_computer/_common.py +18 -2
- inspect_ai/tool/_tools/_computer/_computer.py +18 -2
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
- inspect_ai/tool/_tools/_think.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_anyio.py +27 -0
- inspect_ai/util/_sandbox/__init__.py +2 -1
- inspect_ai/util/_sandbox/context.py +32 -7
- inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/docker/docker.py +12 -1
- inspect_ai/util/_store_model.py +30 -7
- inspect_ai/util/_subprocess.py +13 -3
- inspect_ai/util/_subtask.py +1 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
- /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
inspect_ai/_view/www/src/App.tsx
CHANGED
@@ -12,683 +12,133 @@ import "prismjs/themes/prism.css";
|
|
12
12
|
|
13
13
|
import "../App.css";
|
14
14
|
|
15
|
-
import { AppErrorBoundary } from "./AppErrorBoundary";
|
16
15
|
import { ErrorPanel } from "./components/ErrorPanel";
|
17
16
|
import { ProgressBar } from "./components/ProgressBar";
|
18
|
-
import { clearDocumentSelection } from "./utils/browser";
|
19
|
-
import { debounce, sleep } from "./utils/sync";
|
20
17
|
|
21
18
|
import { FindBand } from "./components/FindBand";
|
22
|
-
import { kDefaultSort } from "./constants";
|
23
|
-
import {
|
24
|
-
createEvalDescriptor,
|
25
|
-
createSamplesDescriptor,
|
26
|
-
} from "./samples/descriptor/samplesDescriptor";
|
27
|
-
import { filterSamples } from "./samples/sample-tools/filters";
|
28
|
-
import {
|
29
|
-
byEpoch,
|
30
|
-
bySample,
|
31
|
-
sortSamples,
|
32
|
-
} from "./samples/sample-tools/SortFilter";
|
33
|
-
import { resolveAttachments } from "./utils/attachments";
|
34
|
-
import { getVscodeApi } from "./utils/vscode";
|
35
19
|
import { Sidebar } from "./workspace/sidebar/Sidebar.tsx";
|
36
20
|
import { WorkSpace } from "./workspace/WorkSpace";
|
37
21
|
|
38
22
|
import ClipboardJS from "clipboard";
|
39
23
|
import clsx from "clsx";
|
40
|
-
import { FC, useCallback, useEffect,
|
41
|
-
import {
|
42
|
-
|
43
|
-
|
44
|
-
EvalSummary,
|
45
|
-
HostMessage,
|
46
|
-
LogFiles,
|
47
|
-
SampleSummary,
|
48
|
-
} from "./api/types.ts";
|
49
|
-
import {
|
50
|
-
kEvalWorkspaceTabId,
|
51
|
-
kInfoWorkspaceTabId,
|
52
|
-
kSampleMessagesTabId,
|
53
|
-
kSampleTranscriptTabId,
|
54
|
-
} from "./constants";
|
55
|
-
import {
|
56
|
-
ApplicationState,
|
57
|
-
AppStatus,
|
58
|
-
Capabilities,
|
59
|
-
CurrentLog,
|
60
|
-
ScoreFilter,
|
61
|
-
ScoreLabel,
|
62
|
-
} from "./types.ts";
|
63
|
-
import { EvalSample } from "./types/log";
|
24
|
+
import { FC, KeyboardEvent, useCallback, useEffect, useRef } from "react";
|
25
|
+
import { ClientAPI, HostMessage } from "./api/types.ts";
|
26
|
+
import { useSetSelectedLogIndex } from "./state/hooks.ts";
|
27
|
+
import { useStore } from "./state/store.ts";
|
64
28
|
|
65
29
|
interface AppProps {
|
66
30
|
api: ClientAPI;
|
67
|
-
applicationState?: ApplicationState;
|
68
|
-
saveApplicationState?: (state: ApplicationState) => void;
|
69
|
-
pollForLogs: boolean;
|
70
|
-
capabilities: Capabilities;
|
71
31
|
}
|
72
32
|
|
73
33
|
/**
|
74
34
|
* Renders the Main Application
|
75
35
|
*/
|
76
|
-
export const App: FC<AppProps> = ({
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
const [logs, setLogs] = useState<LogFiles>(
|
85
|
-
applicationState?.logs || { log_dir: "", files: [] },
|
36
|
+
export const App: FC<AppProps> = ({ api }) => {
|
37
|
+
// App layout and state
|
38
|
+
const appStatus = useStore((state) => state.app.status);
|
39
|
+
const setAppStatus = useStore((state) => state.appActions.setStatus);
|
40
|
+
const offCanvas = useStore((state) => state.app.offcanvas);
|
41
|
+
const setOffCanvas = useStore((state) => state.appActions.setOffcanvas);
|
42
|
+
const clearWorkspaceTab = useStore(
|
43
|
+
(state) => state.appActions.clearWorkspaceTab,
|
86
44
|
);
|
87
|
-
const
|
88
|
-
|
89
|
-
|
90
|
-
|
45
|
+
const clearSampleTab = useStore((state) => state.appActions.clearSampleTab);
|
46
|
+
|
47
|
+
// Find
|
48
|
+
const nativeFind = useStore((state) => state.capabilities.nativeFind);
|
49
|
+
const showFind = useStore((state) => state.app.showFind);
|
50
|
+
const setShowFind = useStore((state) => state.appActions.setShowFind);
|
51
|
+
const hideFind = useStore((state) => state.appActions.hideFind);
|
52
|
+
|
53
|
+
// Logs Data
|
54
|
+
const logs = useStore((state) => state.logs.logs);
|
55
|
+
const selectedLogIndex = useStore((state) => state.logs.selectedLogIndex);
|
56
|
+
const logHeaders = useStore((state) => state.logs.logHeaders);
|
57
|
+
const headersLoading = useStore((state) => state.logs.headersLoading);
|
58
|
+
const setLogs = useStore((state) => state.logsActions.setLogs);
|
59
|
+
const selectedLogFile = useStore((state) =>
|
60
|
+
state.logsActions.getSelectedLogFile(),
|
91
61
|
);
|
62
|
+
const setSelectedLogIndex = useSetSelectedLogIndex();
|
92
63
|
|
93
|
-
|
94
|
-
const
|
95
|
-
applicationState?.logHeaders || {},
|
96
|
-
);
|
97
|
-
const [headersLoading, setHeadersLoading] = useState<boolean>(
|
98
|
-
applicationState?.headersLoading || false,
|
99
|
-
);
|
100
|
-
|
101
|
-
const [selectedLog, setSelectedLog] = useState<CurrentLog | undefined>(
|
102
|
-
applicationState?.selectedLog,
|
103
|
-
);
|
104
|
-
|
105
|
-
// Workspace (the selected tab)
|
106
|
-
const [selectedWorkspaceTab, setSelectedWorkspaceTab] = useState<string>(
|
107
|
-
applicationState?.selectedWorkspaceTab || kEvalWorkspaceTabId,
|
108
|
-
);
|
109
|
-
const [selectedSampleIndex, setSelectedSampleIndex] = useState<number>(
|
110
|
-
applicationState?.selectedSampleIndex !== undefined
|
111
|
-
? applicationState.selectedSampleIndex
|
112
|
-
: -1,
|
113
|
-
);
|
114
|
-
const [selectedSample, setSelectedSample] = useState<EvalSample | undefined>(
|
115
|
-
applicationState?.selectedSample,
|
116
|
-
);
|
117
|
-
const [sampleStatus, setSampleStatus] = useState<"loading" | "ok" | "error">(
|
118
|
-
applicationState?.sampleStatus || "loading",
|
119
|
-
);
|
120
|
-
const [sampleError, setSampleError] = useState<Error | undefined>(
|
121
|
-
applicationState?.sampleError,
|
122
|
-
);
|
123
|
-
const [selectedSampleTab, setSelectedSampleTab] = useState<
|
124
|
-
string | undefined
|
125
|
-
>(applicationState?.selectedSampleTab);
|
126
|
-
const sampleScrollPosition = useRef<number>(
|
127
|
-
applicationState?.sampleScrollPosition || 0,
|
128
|
-
);
|
129
|
-
const loadingSampleIndexRef = useRef<number | null>(null);
|
130
|
-
const workspaceTabScrollPosition = useRef<Record<string, number>>(
|
131
|
-
applicationState?.workspaceTabScrollPosition || {},
|
132
|
-
);
|
133
|
-
|
134
|
-
const [showingSampleDialog, setShowingSampleDialog] = useState<boolean>(
|
135
|
-
!!applicationState?.showingSampleDialog,
|
136
|
-
);
|
137
|
-
|
138
|
-
// App loading status
|
139
|
-
const [status, setStatus] = useState<AppStatus>(
|
140
|
-
applicationState?.status || { loading: false },
|
141
|
-
);
|
142
|
-
|
143
|
-
// Other application state
|
144
|
-
const [offcanvas, setOffcanvas] = useState<boolean>(
|
145
|
-
applicationState?.offcanvas || false,
|
146
|
-
);
|
147
|
-
const [showFind, setShowFind] = useState<boolean>(
|
148
|
-
applicationState?.showFind || false,
|
149
|
-
);
|
150
|
-
|
151
|
-
// Filtering and sorting
|
152
|
-
const [filter, setFilter] = useState<ScoreFilter>(
|
153
|
-
applicationState?.filter || {},
|
154
|
-
);
|
155
|
-
|
156
|
-
const [epoch, setEpoch] = useState<string>(applicationState?.epoch || "all");
|
157
|
-
const [sort, setSort] = useState<string>(
|
158
|
-
applicationState?.sort || kDefaultSort,
|
159
|
-
);
|
160
|
-
|
161
|
-
const [scores, setScores] = useState<ScoreLabel[]>(
|
162
|
-
applicationState?.scores || [],
|
163
|
-
);
|
164
|
-
|
165
|
-
const [score, setScore] = useState<ScoreLabel | undefined>(
|
166
|
-
applicationState?.score,
|
167
|
-
);
|
168
|
-
|
169
|
-
// Re-filter the samples
|
170
|
-
const [filteredSamples, setFilteredSamples] = useState<SampleSummary[]>(
|
171
|
-
applicationState?.filteredSamples || [],
|
172
|
-
);
|
173
|
-
const [groupBy, setGroupBy] = useState<"none" | "epoch" | "sample">(
|
174
|
-
applicationState?.groupBy || "none",
|
175
|
-
);
|
176
|
-
const [groupByOrder, setGroupByOrder] = useState<"asc" | "desc">(
|
177
|
-
applicationState?.groupByOrder || "asc",
|
178
|
-
);
|
179
|
-
|
180
|
-
const saveState = useCallback(() => {
|
181
|
-
const state = {
|
182
|
-
logs,
|
183
|
-
selectedLogIndex,
|
184
|
-
logHeaders,
|
185
|
-
headersLoading,
|
186
|
-
selectedLog,
|
187
|
-
selectedSampleIndex,
|
188
|
-
selectedWorkspaceTab,
|
189
|
-
selectedSample,
|
190
|
-
sampleStatus,
|
191
|
-
sampleError,
|
192
|
-
selectedSampleTab,
|
193
|
-
showingSampleDialog,
|
194
|
-
status,
|
195
|
-
offcanvas,
|
196
|
-
showFind,
|
197
|
-
filter,
|
198
|
-
epoch,
|
199
|
-
sort,
|
200
|
-
scores,
|
201
|
-
score,
|
202
|
-
filteredSamples,
|
203
|
-
groupBy,
|
204
|
-
groupByOrder,
|
205
|
-
sampleScrollPosition: sampleScrollPosition.current,
|
206
|
-
workspaceTabScrollPosition: workspaceTabScrollPosition.current,
|
207
|
-
};
|
208
|
-
if (saveApplicationState) {
|
209
|
-
saveApplicationState(state);
|
210
|
-
}
|
211
|
-
}, [
|
212
|
-
logs,
|
213
|
-
selectedLogIndex,
|
214
|
-
logHeaders,
|
215
|
-
headersLoading,
|
216
|
-
selectedLog,
|
217
|
-
selectedSampleIndex,
|
218
|
-
selectedWorkspaceTab,
|
219
|
-
selectedSample,
|
220
|
-
sampleStatus,
|
221
|
-
sampleError,
|
222
|
-
selectedSampleTab,
|
223
|
-
showingSampleDialog,
|
224
|
-
status,
|
225
|
-
offcanvas,
|
226
|
-
showFind,
|
227
|
-
filter,
|
228
|
-
epoch,
|
229
|
-
sort,
|
230
|
-
scores,
|
231
|
-
score,
|
232
|
-
filteredSamples,
|
233
|
-
groupBy,
|
234
|
-
groupByOrder,
|
235
|
-
]);
|
236
|
-
|
237
|
-
const saveStateRef = useRef(saveState);
|
238
|
-
// Update the ref whenever saveState changes
|
239
|
-
useEffect(() => {
|
240
|
-
saveStateRef.current = saveState;
|
241
|
-
}, [saveState]);
|
242
|
-
|
243
|
-
const setSampleScrollPosition = useCallback(
|
244
|
-
debounce((position) => {
|
245
|
-
sampleScrollPosition.current = position;
|
246
|
-
saveStateRef.current();
|
247
|
-
}, 1000),
|
248
|
-
[],
|
249
|
-
);
|
250
|
-
|
251
|
-
const setWorkspaceTabScrollPosition = useCallback(
|
252
|
-
debounce((tab, position) => {
|
253
|
-
if (workspaceTabScrollPosition.current[tab] !== position) {
|
254
|
-
workspaceTabScrollPosition.current = {
|
255
|
-
...workspaceTabScrollPosition.current,
|
256
|
-
[tab]: position,
|
257
|
-
};
|
258
|
-
saveStateRef.current();
|
259
|
-
}
|
260
|
-
}, 1000),
|
261
|
-
[],
|
262
|
-
);
|
64
|
+
const refreshLogs = useStore((state) => state.logsActions.refreshLogs);
|
65
|
+
const selectLogFile = useStore((state) => state.logsActions.selectLogFile);
|
263
66
|
|
264
|
-
//
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
logs,
|
270
|
-
selectedLogIndex,
|
271
|
-
logHeaders,
|
272
|
-
headersLoading,
|
273
|
-
selectedLog,
|
274
|
-
selectedSampleIndex,
|
275
|
-
selectedWorkspaceTab,
|
276
|
-
selectedSample,
|
277
|
-
sampleStatus,
|
278
|
-
sampleError,
|
279
|
-
selectedSampleTab,
|
280
|
-
showingSampleDialog,
|
281
|
-
status,
|
282
|
-
offcanvas,
|
283
|
-
showFind,
|
284
|
-
filter,
|
285
|
-
epoch,
|
286
|
-
sort,
|
287
|
-
scores,
|
288
|
-
score,
|
289
|
-
filteredSamples,
|
290
|
-
groupBy,
|
291
|
-
groupByOrder,
|
292
|
-
]);
|
293
|
-
|
294
|
-
const handleSampleShowingDialog = useCallback(
|
295
|
-
(show: boolean) => {
|
296
|
-
setShowingSampleDialog(show);
|
297
|
-
if (!show) {
|
298
|
-
setSelectedSample(undefined);
|
299
|
-
setSelectedSampleTab(undefined);
|
300
|
-
}
|
301
|
-
},
|
302
|
-
[
|
303
|
-
setShowingSampleDialog,
|
304
|
-
setSelectedSample,
|
305
|
-
setSelectedSampleTab,
|
306
|
-
selectedSample,
|
307
|
-
],
|
67
|
+
// Log Data
|
68
|
+
const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
|
69
|
+
const loadedLogFile = useStore((state) => state.log.loadedLog);
|
70
|
+
const runningMetrics = useStore(
|
71
|
+
(state) => state.log.pendingSampleSummaries?.metrics,
|
308
72
|
);
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
? filterSamples(evalDescriptor, samples, filter.value)
|
315
|
-
: { result: samples };
|
316
|
-
|
317
|
-
const filtered = prefiltered.filter((sample) => {
|
318
|
-
// Filter by epoch if specified
|
319
|
-
if (epoch && epoch !== "all") {
|
320
|
-
if (epoch !== String(sample.epoch)) {
|
321
|
-
return false;
|
322
|
-
}
|
323
|
-
}
|
324
|
-
return true;
|
325
|
-
});
|
326
|
-
|
327
|
-
// Sort the samples
|
328
|
-
if (samplesDescriptor) {
|
329
|
-
const { sorted, order } = sortSamples(sort, filtered, samplesDescriptor);
|
330
|
-
setFilteredSamples(sorted);
|
331
|
-
setGroupByOrder(order);
|
332
|
-
}
|
333
|
-
|
334
|
-
// Set the grouping
|
335
|
-
let grouping: "none" | "epoch" | "sample" = "none";
|
336
|
-
if (
|
337
|
-
samplesDescriptor?.evalDescriptor?.epochs &&
|
338
|
-
samplesDescriptor.evalDescriptor.epochs > 1
|
339
|
-
) {
|
340
|
-
if (byEpoch(sort) || epoch !== "all") {
|
341
|
-
grouping = "epoch";
|
342
|
-
} else if (bySample(sort)) {
|
343
|
-
grouping = "sample";
|
344
|
-
}
|
345
|
-
}
|
346
|
-
setGroupBy(grouping);
|
347
|
-
}, [selectedLog, filter, sort, epoch]);
|
348
|
-
|
349
|
-
const evalDescriptor = useMemo(() => {
|
350
|
-
return createEvalDescriptor(
|
351
|
-
scores,
|
352
|
-
selectedLog?.contents?.eval?.config?.epochs || 1,
|
353
|
-
selectedLog?.contents?.sampleSummaries,
|
354
|
-
);
|
355
|
-
}, [selectedLog, scores]);
|
356
|
-
|
357
|
-
const samplesDescriptor = useMemo(() => {
|
358
|
-
return evalDescriptor && score
|
359
|
-
? createSamplesDescriptor(evalDescriptor, score)
|
360
|
-
: undefined;
|
361
|
-
}, [evalDescriptor, score]);
|
362
|
-
|
363
|
-
useEffect(() => {
|
364
|
-
if (selectedSampleTab === undefined && selectedSample) {
|
365
|
-
setSelectedSampleTab(
|
366
|
-
selectedSample.events && selectedSample.events.length > 0
|
367
|
-
? kSampleTranscriptTabId
|
368
|
-
: kSampleMessagesTabId,
|
369
|
-
);
|
370
|
-
}
|
371
|
-
}, [selectedSample, selectedSampleTab]);
|
73
|
+
const resetFiltering = useStore((state) => state.logActions.resetFiltering);
|
74
|
+
const loadLog = useStore((state) => state.logActions.loadLog);
|
75
|
+
const pollLog = useStore((state) => state.logActions.pollLog);
|
76
|
+
const refreshLog = useStore((state) => state.logActions.refreshLog);
|
77
|
+
const selectSample = useStore((state) => state.logActions.selectSample);
|
372
78
|
|
373
79
|
// The main application reference
|
374
80
|
const mainAppRef = useRef<HTMLDivElement>(null);
|
375
81
|
|
376
|
-
// Loads a sample
|
377
|
-
useEffect(() => {
|
378
|
-
// Clear the selected sample
|
379
|
-
if (!selectedLog || selectedSampleIndex === -1) {
|
380
|
-
setSelectedSample(undefined);
|
381
|
-
return;
|
382
|
-
}
|
383
|
-
|
384
|
-
// If already loading the selected sample, do nothing
|
385
|
-
if (loadingSampleIndexRef.current === selectedSampleIndex) {
|
386
|
-
return;
|
387
|
-
}
|
388
|
-
|
389
|
-
if (
|
390
|
-
!showingSampleDialog &&
|
391
|
-
selectedLog.contents.sampleSummaries.length > 1
|
392
|
-
) {
|
393
|
-
return;
|
394
|
-
}
|
395
|
-
|
396
|
-
if (selectedSampleIndex < filteredSamples.length) {
|
397
|
-
const summary = filteredSamples[selectedSampleIndex];
|
398
|
-
// If this sample is already loaded, don't bother
|
399
|
-
if (
|
400
|
-
selectedSample &&
|
401
|
-
selectedSample.id === summary.id &&
|
402
|
-
selectedSample.epoch === summary.epoch
|
403
|
-
) {
|
404
|
-
return;
|
405
|
-
}
|
406
|
-
|
407
|
-
// Load the selected sample (if not already loaded)
|
408
|
-
loadingSampleIndexRef.current = selectedSampleIndex;
|
409
|
-
setSampleStatus("loading");
|
410
|
-
setSampleError(undefined);
|
411
|
-
|
412
|
-
api
|
413
|
-
.get_log_sample(selectedLog.name, summary.id, summary.epoch)
|
414
|
-
.then((sample) => {
|
415
|
-
if (sample) {
|
416
|
-
// This migrates old samples (with raw transcript element)
|
417
|
-
// to the new structure (hence the type bypass).
|
418
|
-
const anySample = sample as any;
|
419
|
-
if (anySample.transcript) {
|
420
|
-
sample.events = anySample.transcript.events;
|
421
|
-
sample.attachments = anySample.transcript.content;
|
422
|
-
}
|
423
|
-
sample.attachments = sample.attachments || {};
|
424
|
-
sample.input = resolveAttachments(sample.input, sample.attachments);
|
425
|
-
sample.messages = resolveAttachments(
|
426
|
-
sample.messages,
|
427
|
-
sample.attachments,
|
428
|
-
);
|
429
|
-
sample.events = resolveAttachments(
|
430
|
-
sample.events,
|
431
|
-
sample.attachments,
|
432
|
-
);
|
433
|
-
sample.attachments = {};
|
434
|
-
|
435
|
-
sampleScrollPosition.current = 0;
|
436
|
-
setSelectedSample(sample);
|
437
|
-
|
438
|
-
setSampleStatus("ok");
|
439
|
-
loadingSampleIndexRef.current = null;
|
440
|
-
} else {
|
441
|
-
throw Error("Unable to load sample - an unknown error occurred.");
|
442
|
-
}
|
443
|
-
})
|
444
|
-
.catch((e) => {
|
445
|
-
setSampleStatus("error");
|
446
|
-
setSampleError(e);
|
447
|
-
|
448
|
-
sampleScrollPosition.current = 0;
|
449
|
-
setSelectedSample(undefined);
|
450
|
-
|
451
|
-
loadingSampleIndexRef.current = null;
|
452
|
-
});
|
453
|
-
}
|
454
|
-
}, [
|
455
|
-
selectedSample,
|
456
|
-
selectedSampleIndex,
|
457
|
-
showingSampleDialog,
|
458
|
-
selectedLog,
|
459
|
-
filteredSamples,
|
460
|
-
setSelectedSample,
|
461
|
-
setSampleStatus,
|
462
|
-
setSampleError,
|
463
|
-
]);
|
464
|
-
|
465
|
-
// Read header information for the logs
|
466
|
-
// and then update
|
467
|
-
useEffect(() => {
|
468
|
-
const loadHeaders = async () => {
|
469
|
-
setHeadersLoading(true);
|
470
|
-
|
471
|
-
// Group into chunks
|
472
|
-
const chunkSize = 8;
|
473
|
-
const fileLists = [];
|
474
|
-
for (let i = 0; i < logs.files.length; i += chunkSize) {
|
475
|
-
let chunk = logs.files.slice(i, i + chunkSize).map((log) => log.name);
|
476
|
-
fileLists.push(chunk);
|
477
|
-
}
|
478
|
-
|
479
|
-
// Chunk by chunk, read the header information
|
480
|
-
try {
|
481
|
-
for (const fileList of fileLists) {
|
482
|
-
const headers = await api.get_log_headers(fileList);
|
483
|
-
setLogHeaders((prev) => {
|
484
|
-
const updatedHeaders: Record<string, EvalLogHeader> = {};
|
485
|
-
headers.forEach((header, index) => {
|
486
|
-
const logFile = fileList[index];
|
487
|
-
updatedHeaders[logFile] = header as EvalLogHeader;
|
488
|
-
});
|
489
|
-
return { ...prev, ...updatedHeaders };
|
490
|
-
});
|
491
|
-
|
492
|
-
if (headers.length === chunkSize) {
|
493
|
-
await sleep(5000); // Pause between chunks
|
494
|
-
}
|
495
|
-
}
|
496
|
-
} catch (e: unknown) {
|
497
|
-
if (
|
498
|
-
e instanceof Error &&
|
499
|
-
(e.message === "Load failed" || e.message === "Failed to fetch")
|
500
|
-
) {
|
501
|
-
// This will happen if the server disappears (e.g. inspect view is terminated)
|
502
|
-
setStatus({ loading: false });
|
503
|
-
} else {
|
504
|
-
console.log(e);
|
505
|
-
setStatus({ loading: false, error: e as Error });
|
506
|
-
}
|
507
|
-
}
|
508
|
-
setHeadersLoading(false);
|
509
|
-
};
|
510
|
-
|
511
|
-
loadHeaders();
|
512
|
-
}, [logs, setStatus, setLogHeaders, setHeadersLoading]);
|
513
|
-
|
514
|
-
/**
|
515
|
-
* Resets the workspace tab based on the provided log's state.
|
516
|
-
*
|
517
|
-
* Determines whether the workspace tab should display samples or info,
|
518
|
-
* depending on the presence of samples and the log status.
|
519
|
-
*/
|
520
|
-
const resetWorkspace = useCallback(
|
521
|
-
(log: EvalSummary) => {
|
522
|
-
// Reset the workspace tab
|
523
|
-
const hasSamples =
|
524
|
-
!!log.sampleSummaries && log.sampleSummaries.length > 0;
|
525
|
-
const showSamples = hasSamples;
|
526
|
-
setSelectedWorkspaceTab(
|
527
|
-
log.status !== "error" && hasSamples
|
528
|
-
? kEvalWorkspaceTabId
|
529
|
-
: kInfoWorkspaceTabId,
|
530
|
-
);
|
531
|
-
|
532
|
-
// Select the default scorer to use
|
533
|
-
const scorer = defaultScorer(log);
|
534
|
-
const scorers = defaultScorers(log);
|
535
|
-
|
536
|
-
// Reset state
|
537
|
-
setScores(scorers);
|
538
|
-
setScore(scorer);
|
539
|
-
|
540
|
-
setEpoch("all");
|
541
|
-
setFilter({});
|
542
|
-
setSort(kDefaultSort);
|
543
|
-
|
544
|
-
// Reset the sample tab
|
545
|
-
setSelectedSampleTab(undefined);
|
546
|
-
setSelectedSample(undefined);
|
547
|
-
if (showSamples) {
|
548
|
-
setSelectedSampleIndex(0);
|
549
|
-
} else {
|
550
|
-
setSelectedSampleIndex(-1);
|
551
|
-
}
|
552
|
-
|
553
|
-
workspaceTabScrollPosition.current = {};
|
554
|
-
},
|
555
|
-
[setSelectedWorkspaceTab],
|
556
|
-
);
|
557
|
-
|
558
82
|
// Load a specific log
|
559
83
|
useEffect(() => {
|
560
84
|
const loadSpecificLog = async () => {
|
561
|
-
|
562
|
-
if (targetLog && (!selectedLog || selectedLog.name !== targetLog.name)) {
|
85
|
+
if (selectedLogFile && selectedLogFile !== loadedLogFile) {
|
563
86
|
try {
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
// Reset the workspace tab
|
574
|
-
resetWorkspace(log);
|
575
|
-
|
576
|
-
setStatus({ loading: false, error: undefined });
|
577
|
-
}
|
87
|
+
// Set loading first and wait for it to update
|
88
|
+
setAppStatus({ loading: true, error: undefined });
|
89
|
+
|
90
|
+
// Then load the log
|
91
|
+
await loadLog(selectedLogFile);
|
92
|
+
selectSample(0);
|
93
|
+
|
94
|
+
// Finally set loading to false
|
95
|
+
setAppStatus({ loading: false, error: undefined });
|
578
96
|
} catch (e) {
|
579
97
|
console.log(e);
|
580
|
-
|
98
|
+
setAppStatus({ loading: false, error: e as Error });
|
581
99
|
}
|
582
|
-
} else if (logs.log_dir && logs.files.length === 0) {
|
583
|
-
setStatus({
|
584
|
-
loading: false,
|
585
|
-
error: new Error(
|
586
|
-
`No log files to display in the directory ${logs.log_dir}. Are you sure this is the correct log directory?`,
|
587
|
-
),
|
588
|
-
});
|
589
100
|
}
|
590
101
|
};
|
591
102
|
|
592
103
|
loadSpecificLog();
|
593
|
-
}, [
|
104
|
+
}, [selectedLogFile, loadedLogFile, loadLog, setAppStatus]);
|
594
105
|
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
console.log(e);
|
604
|
-
setStatus({ loading: false, error: e as Error });
|
605
|
-
return { log_dir: "", files: [] };
|
106
|
+
useEffect(() => {
|
107
|
+
// If the component re-mounts and there is a running load loaded
|
108
|
+
// start up polling
|
109
|
+
const doPoll = async () => {
|
110
|
+
await pollLog();
|
111
|
+
};
|
112
|
+
if (selectedLogSummary?.status === "started") {
|
113
|
+
doPoll();
|
606
114
|
}
|
607
|
-
};
|
115
|
+
}, []);
|
608
116
|
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
setStatus({ loading: false, error: e as Error });
|
117
|
+
useEffect(() => {
|
118
|
+
if (logs.log_dir && logs.files.length === 0) {
|
119
|
+
setAppStatus({
|
120
|
+
loading: false,
|
121
|
+
error: new Error(
|
122
|
+
`No log files to display in the directory ${logs.log_dir}. Are you sure this is the correct log directory?`,
|
123
|
+
),
|
124
|
+
});
|
618
125
|
}
|
619
|
-
};
|
126
|
+
}, [logs.log_dir, logs.files.length]);
|
620
127
|
|
621
|
-
const
|
128
|
+
const appRefreshLog = useCallback(() => {
|
622
129
|
try {
|
623
|
-
|
624
|
-
const targetLog = logs.files[selectedLogIndex];
|
625
|
-
const logContents = await loadLog(targetLog.name);
|
626
|
-
if (logContents) {
|
627
|
-
const log = logContents;
|
628
|
-
if (log.status !== "started") {
|
629
|
-
setLogHeaders((prev) => {
|
630
|
-
const updatedState = { ...prev };
|
631
|
-
const freshHeaders: EvalLogHeader = {
|
632
|
-
eval: log.eval,
|
633
|
-
plan: log.plan,
|
634
|
-
results: log.results !== null ? log.results : undefined,
|
635
|
-
stats: log.stats,
|
636
|
-
status: log.status,
|
637
|
-
version: log.version,
|
638
|
-
};
|
639
|
-
updatedState[targetLog.name] = freshHeaders;
|
640
|
-
return updatedState;
|
641
|
-
});
|
642
|
-
}
|
643
|
-
|
644
|
-
setSelectedLog({
|
645
|
-
contents: log,
|
646
|
-
name: targetLog.name,
|
647
|
-
});
|
130
|
+
setAppStatus({ loading: true, error: undefined });
|
648
131
|
|
649
|
-
|
650
|
-
|
132
|
+
refreshLog();
|
133
|
+
resetFiltering();
|
651
134
|
|
652
|
-
|
653
|
-
}
|
135
|
+
setAppStatus({ loading: false, error: undefined });
|
654
136
|
} catch (e) {
|
655
137
|
// Show an error
|
656
138
|
console.log(e);
|
657
|
-
|
139
|
+
setAppStatus({ loading: false, error: e as Error });
|
658
140
|
}
|
659
|
-
}, [
|
660
|
-
|
661
|
-
const showLogFile = useCallback(
|
662
|
-
async (logUrl: string) => {
|
663
|
-
const index = logs.files.findIndex((val) => {
|
664
|
-
return logUrl.endsWith(val.name);
|
665
|
-
});
|
666
|
-
if (index > -1) {
|
667
|
-
setSelectedLogIndex(index);
|
668
|
-
} else {
|
669
|
-
const result = await loadLogs();
|
670
|
-
const idx = result?.files.findIndex((file) => {
|
671
|
-
return logUrl.endsWith(file.name);
|
672
|
-
});
|
673
|
-
setLogs(result || { log_dir: "", files: [] });
|
674
|
-
setSelectedLogIndex(idx && idx > -1 ? idx : 0);
|
675
|
-
}
|
676
|
-
},
|
677
|
-
[logs, setSelectedLogIndex, setLogs],
|
678
|
-
);
|
679
|
-
|
680
|
-
const refreshLogList = useCallback(async () => {
|
681
|
-
const currentLog = logs.files[selectedLogIndex > -1 ? selectedLogIndex : 0];
|
682
|
-
const refreshedLogs = await loadLogs();
|
683
|
-
setLogs(refreshedLogs || { log_dir: "", files: [] });
|
684
|
-
|
685
|
-
const newIndex = refreshedLogs?.files.findIndex((file) => {
|
686
|
-
return currentLog.name.endsWith(file.name);
|
687
|
-
});
|
688
|
-
if (newIndex !== undefined) {
|
689
|
-
setSelectedLogIndex(newIndex);
|
690
|
-
}
|
691
|
-
}, [logs, selectedLogIndex, setSelectedLogIndex, setLogs]);
|
141
|
+
}, [refreshLog, resetFiltering, setAppStatus]);
|
692
142
|
|
693
143
|
const onMessage = useCallback(
|
694
144
|
async (e: HostMessage) => {
|
@@ -696,7 +146,7 @@ export const App: FC<AppProps> = ({
|
|
696
146
|
case "updateState": {
|
697
147
|
if (e.data.url) {
|
698
148
|
const decodedUrl = decodeURIComponent(e.data.url);
|
699
|
-
|
149
|
+
selectLogFile(decodedUrl);
|
700
150
|
}
|
701
151
|
break;
|
702
152
|
}
|
@@ -706,18 +156,18 @@ export const App: FC<AppProps> = ({
|
|
706
156
|
const isFocused = document.hasFocus();
|
707
157
|
if (!isFocused) {
|
708
158
|
if (log_dir === logs.log_dir) {
|
709
|
-
|
159
|
+
selectLogFile(decodedUrl);
|
710
160
|
} else {
|
711
161
|
api.open_log_file(e.data.url, e.data.log_dir);
|
712
162
|
}
|
713
163
|
} else {
|
714
|
-
|
164
|
+
refreshLogs();
|
715
165
|
}
|
716
166
|
break;
|
717
167
|
}
|
718
168
|
}
|
719
169
|
},
|
720
|
-
[logs,
|
170
|
+
[logs, selectLogFile, refreshLogs],
|
721
171
|
);
|
722
172
|
|
723
173
|
// listen for updateState messages from vscode
|
@@ -730,123 +180,95 @@ export const App: FC<AppProps> = ({
|
|
730
180
|
|
731
181
|
useEffect(() => {
|
732
182
|
const loadLogsAndState = async () => {
|
733
|
-
//
|
734
|
-
const urlParams = new URLSearchParams(window.location.search);
|
735
|
-
|
736
|
-
// If the URL provides a task file, load that
|
737
|
-
const logPath = urlParams.get("task_file");
|
738
|
-
|
739
|
-
// Replace spaces with a '+' sign:
|
740
|
-
const resolvedLogPath = logPath ? logPath.replace(" ", "+") : logPath;
|
741
|
-
const load = resolvedLogPath
|
742
|
-
? async (): Promise<LogFiles> => {
|
743
|
-
return {
|
744
|
-
log_dir: "",
|
745
|
-
files: [{ name: resolvedLogPath }],
|
746
|
-
};
|
747
|
-
}
|
748
|
-
: loadLogs;
|
749
|
-
|
183
|
+
// First see if there is embedded state and if so, use that
|
750
184
|
const embeddedState = document.getElementById("logview-state");
|
751
185
|
if (embeddedState) {
|
752
186
|
const state = JSON5.parse(embeddedState.textContent || "");
|
753
187
|
onMessage({ data: state });
|
754
188
|
} else {
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
// If a
|
759
|
-
const
|
760
|
-
|
761
|
-
|
762
|
-
|
189
|
+
// See whether a specific task_file has been passed.
|
190
|
+
const urlParams = new URLSearchParams(window.location.search);
|
191
|
+
|
192
|
+
// If the URL provides a task file, load that
|
193
|
+
const logPath = urlParams.get("task_file");
|
194
|
+
|
195
|
+
// Replace spaces with a '+' sign:
|
196
|
+
const resolvedLogPath = logPath ? logPath.replace(" ", "+") : logPath;
|
197
|
+
|
198
|
+
if (resolvedLogPath) {
|
199
|
+
// Load only this file
|
200
|
+
setLogs({
|
201
|
+
log_dir: "",
|
202
|
+
files: [{ name: resolvedLogPath }],
|
763
203
|
});
|
764
|
-
|
765
|
-
|
204
|
+
} else {
|
205
|
+
// If a log file was passed, select it
|
206
|
+
const log_file = urlParams.get("log_file");
|
207
|
+
if (log_file) {
|
208
|
+
await selectLogFile(log_file);
|
209
|
+
} else {
|
210
|
+
// Load all logs
|
211
|
+
await refreshLogs();
|
766
212
|
}
|
767
|
-
} else if (selectedLogIndex === -1) {
|
768
|
-
setSelectedLogIndex(0);
|
769
213
|
}
|
770
214
|
}
|
771
215
|
|
772
216
|
new ClipboardJS(".clipboard-button,.copy-button");
|
773
|
-
|
774
|
-
if (pollForLogs) {
|
775
|
-
let retryDelay = 1000;
|
776
|
-
const maxRetryDelay = 60000;
|
777
|
-
|
778
|
-
const pollEvents = async () => {
|
779
|
-
try {
|
780
|
-
const events = await api.client_events();
|
781
|
-
|
782
|
-
if (events.includes("reload")) {
|
783
|
-
window.location.reload();
|
784
|
-
}
|
785
|
-
|
786
|
-
if (events.includes("refresh-evals")) {
|
787
|
-
const logs = await load();
|
788
|
-
setLogs(logs);
|
789
|
-
setSelectedLogIndex(0);
|
790
|
-
}
|
791
|
-
|
792
|
-
// Reset delay after a successful call
|
793
|
-
retryDelay = 1000;
|
794
|
-
} catch (error) {
|
795
|
-
console.error("Error fetching client events:", error);
|
796
|
-
|
797
|
-
// Exponential backoff with capping
|
798
|
-
retryDelay = Math.min(retryDelay * 2, maxRetryDelay);
|
799
|
-
} finally {
|
800
|
-
// Schedule the next poll
|
801
|
-
setTimeout(pollEvents, retryDelay);
|
802
|
-
}
|
803
|
-
};
|
804
|
-
|
805
|
-
// Start polling
|
806
|
-
pollEvents();
|
807
|
-
}
|
808
217
|
};
|
809
218
|
|
810
219
|
loadLogsAndState();
|
811
|
-
}, []);
|
220
|
+
}, [setLogs, selectLogFile, refreshLogs]);
|
812
221
|
|
813
222
|
// Configure an app envelope specific to the current state
|
814
223
|
// if there are no log files, then don't show sidebar
|
815
224
|
const fullScreen = logs.files.length === 1 && !logs.log_dir;
|
816
225
|
|
817
|
-
const hideFind = useCallback(() => {
|
818
|
-
clearDocumentSelection();
|
819
|
-
if (showFind) {
|
820
|
-
setShowFind(false);
|
821
|
-
}
|
822
|
-
}, [showFind, setShowFind]);
|
823
|
-
|
824
226
|
const showToggle = logs.files.length > 1 || !!logs.log_dir || false;
|
825
227
|
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
228
|
+
const handleSelectedIndexChanged = useCallback(
|
229
|
+
(index: number) => {
|
230
|
+
setSelectedLogIndex(index);
|
231
|
+
setOffCanvas(false);
|
232
|
+
resetFiltering();
|
233
|
+
clearSampleTab();
|
234
|
+
clearWorkspaceTab();
|
235
|
+
selectSample(0);
|
236
|
+
},
|
237
|
+
[
|
238
|
+
setSelectedLogIndex,
|
239
|
+
setOffCanvas,
|
240
|
+
resetFiltering,
|
241
|
+
clearSampleTab,
|
242
|
+
clearWorkspaceTab,
|
243
|
+
selectSample,
|
244
|
+
],
|
245
|
+
);
|
246
|
+
|
247
|
+
const handleKeyboard = useCallback(
|
248
|
+
(e: KeyboardEvent) => {
|
249
|
+
// Add keyboard shortcuts for find, if needed
|
250
|
+
if (nativeFind || !setShowFind) {
|
251
|
+
return;
|
252
|
+
}
|
253
|
+
|
254
|
+
if ((e.ctrlKey || e.metaKey) && e.key === "f") {
|
255
|
+
setShowFind(true);
|
256
|
+
} else if (e.key === "Escape") {
|
257
|
+
hideFind();
|
258
|
+
}
|
259
|
+
},
|
260
|
+
[nativeFind, setShowFind, hideFind],
|
261
|
+
);
|
262
|
+
|
836
263
|
return (
|
837
|
-
|
838
|
-
{!fullScreen &&
|
264
|
+
<>
|
265
|
+
{!fullScreen && selectedLogSummary ? (
|
839
266
|
<Sidebar
|
840
267
|
logs={logs}
|
841
268
|
logHeaders={logHeaders}
|
842
269
|
loading={headersLoading}
|
843
|
-
offcanvas={offcanvas}
|
844
|
-
setOffcanvas={setOffcanvas}
|
845
270
|
selectedIndex={selectedLogIndex}
|
846
|
-
onSelectedIndexChanged={
|
847
|
-
setSelectedLogIndex(index);
|
848
|
-
setOffcanvas(false);
|
849
|
-
}}
|
271
|
+
onSelectedIndexChanged={handleSelectedIndexChanged}
|
850
272
|
/>
|
851
273
|
) : undefined}
|
852
274
|
<div
|
@@ -854,79 +276,35 @@ export const App: FC<AppProps> = ({
|
|
854
276
|
className={clsx(
|
855
277
|
"app-main-grid",
|
856
278
|
fullScreen ? "full-screen" : undefined,
|
857
|
-
|
279
|
+
offCanvas ? "off-canvas" : undefined,
|
858
280
|
)}
|
859
281
|
tabIndex={0}
|
860
|
-
onKeyDown={
|
861
|
-
// regular browsers user their own find
|
862
|
-
if (!getVscodeApi()) {
|
863
|
-
return;
|
864
|
-
}
|
865
|
-
|
866
|
-
if ((e.ctrlKey || e.metaKey) && e.key === "f") {
|
867
|
-
setShowFind(true);
|
868
|
-
} else if (e.key === "Escape") {
|
869
|
-
hideFind();
|
870
|
-
}
|
871
|
-
}}
|
282
|
+
onKeyDown={handleKeyboard}
|
872
283
|
>
|
873
|
-
{showFind ? <FindBand
|
874
|
-
<ProgressBar animating={
|
875
|
-
{
|
284
|
+
{!nativeFind && showFind ? <FindBand /> : ""}
|
285
|
+
<ProgressBar animating={appStatus.loading} />
|
286
|
+
{appStatus.error ? (
|
876
287
|
<ErrorPanel
|
877
288
|
title="An error occurred while loading this task."
|
878
|
-
error={
|
289
|
+
error={appStatus.error}
|
879
290
|
/>
|
880
291
|
) : (
|
881
292
|
<WorkSpace
|
882
|
-
task_id={
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
293
|
+
task_id={selectedLogSummary?.eval?.task_id}
|
294
|
+
evalStatus={selectedLogSummary?.status}
|
295
|
+
evalError={filterNull(selectedLogSummary?.error)}
|
296
|
+
evalVersion={selectedLogSummary?.version}
|
297
|
+
evalSpec={selectedLogSummary?.eval}
|
298
|
+
evalPlan={selectedLogSummary?.plan}
|
299
|
+
evalStats={selectedLogSummary?.stats}
|
300
|
+
evalResults={filterNull(selectedLogSummary?.results)}
|
301
|
+
runningMetrics={runningMetrics}
|
891
302
|
showToggle={showToggle}
|
892
|
-
|
893
|
-
sampleMode={sampleMode}
|
894
|
-
groupBy={groupBy}
|
895
|
-
groupByOrder={groupByOrder}
|
896
|
-
sampleStatus={sampleStatus}
|
897
|
-
sampleError={sampleError}
|
898
|
-
samplesDescriptor={samplesDescriptor}
|
899
|
-
refreshLog={refreshLog}
|
900
|
-
offcanvas={offcanvas}
|
901
|
-
setOffcanvas={setOffcanvas}
|
902
|
-
capabilities={capabilities}
|
903
|
-
selectedSample={selectedSample}
|
904
|
-
selectedSampleIndex={selectedSampleIndex}
|
905
|
-
setSelectedSampleIndex={setSelectedSampleIndex}
|
906
|
-
showingSampleDialog={showingSampleDialog}
|
907
|
-
setShowingSampleDialog={handleSampleShowingDialog}
|
908
|
-
selectedTab={selectedWorkspaceTab}
|
909
|
-
setSelectedTab={setSelectedWorkspaceTab}
|
910
|
-
selectedSampleTab={selectedSampleTab}
|
911
|
-
setSelectedSampleTab={setSelectedSampleTab}
|
912
|
-
sort={sort}
|
913
|
-
setSort={setSort}
|
914
|
-
epochs={selectedLog?.contents?.eval?.config?.epochs}
|
915
|
-
epoch={epoch}
|
916
|
-
setEpoch={setEpoch}
|
917
|
-
filter={filter}
|
918
|
-
setFilter={setFilter}
|
919
|
-
score={score}
|
920
|
-
setScore={setScore}
|
921
|
-
scores={scores}
|
922
|
-
sampleScrollPositionRef={sampleScrollPosition}
|
923
|
-
setSampleScrollPosition={setSampleScrollPosition}
|
924
|
-
workspaceTabScrollPositionRef={workspaceTabScrollPosition}
|
925
|
-
setWorkspaceTabScrollPosition={setWorkspaceTabScrollPosition}
|
303
|
+
refreshLog={appRefreshLog}
|
926
304
|
/>
|
927
305
|
)}
|
928
306
|
</div>
|
929
|
-
|
307
|
+
</>
|
930
308
|
);
|
931
309
|
};
|
932
310
|
|
@@ -936,73 +314,3 @@ const filterNull = <T,>(obj: T | null): T | undefined => {
|
|
936
314
|
}
|
937
315
|
return obj;
|
938
316
|
};
|
939
|
-
|
940
|
-
interface ScorerInfo {
|
941
|
-
name: string;
|
942
|
-
scorer: string;
|
943
|
-
}
|
944
|
-
|
945
|
-
/**
|
946
|
-
* Determines the default scorer for a log
|
947
|
-
*/
|
948
|
-
const defaultScorer = (log: EvalSummary): ScorerInfo | undefined => {
|
949
|
-
if (log.sampleSummaries.length === 0) {
|
950
|
-
return undefined;
|
951
|
-
}
|
952
|
-
|
953
|
-
// Select the default scorer to use
|
954
|
-
const scores = log.sampleSummaries[0].scores;
|
955
|
-
|
956
|
-
const scorer = log.results?.scores[0]
|
957
|
-
? {
|
958
|
-
name: log.results?.scores[0].name,
|
959
|
-
scorer: log.results?.scores[0].scorer,
|
960
|
-
}
|
961
|
-
: log.sampleSummaries.length > 0 && scores !== null
|
962
|
-
? {
|
963
|
-
name: Object.keys(scores)[0],
|
964
|
-
scorer: Object.keys(scores)[0],
|
965
|
-
}
|
966
|
-
: undefined;
|
967
|
-
return scorer;
|
968
|
-
};
|
969
|
-
|
970
|
-
/**
|
971
|
-
* Determines the default scorers for a log
|
972
|
-
*/
|
973
|
-
const defaultScorers = (log: EvalSummary): Array<ScorerInfo> => {
|
974
|
-
if (log.results?.scores) {
|
975
|
-
return (log.results?.scores || [])
|
976
|
-
.map((score): ScorerInfo => {
|
977
|
-
return {
|
978
|
-
name: score.name,
|
979
|
-
scorer: score.scorer,
|
980
|
-
};
|
981
|
-
})
|
982
|
-
.reduce((accum, scorer) => {
|
983
|
-
if (
|
984
|
-
!accum.find((sc) => {
|
985
|
-
return scorer.scorer === sc.scorer && scorer.name === sc.name;
|
986
|
-
})
|
987
|
-
) {
|
988
|
-
accum.push(scorer);
|
989
|
-
}
|
990
|
-
return accum;
|
991
|
-
}, [] as Array<ScorerInfo>);
|
992
|
-
} else if (log.sampleSummaries && log.sampleSummaries.length > 0) {
|
993
|
-
const scores = log.sampleSummaries[0].scores;
|
994
|
-
|
995
|
-
if (scores !== null) {
|
996
|
-
return Object.keys(scores).map((key) => {
|
997
|
-
return {
|
998
|
-
name: key,
|
999
|
-
scorer: key,
|
1000
|
-
};
|
1001
|
-
});
|
1002
|
-
} else {
|
1003
|
-
return [];
|
1004
|
-
}
|
1005
|
-
} else {
|
1006
|
-
return [];
|
1007
|
-
}
|
1008
|
-
};
|