inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_cli/eval.py +35 -2
- inspect_ai/_cli/util.py +44 -1
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +13 -4
- inspect_ai/_display/core/results.py +1 -1
- inspect_ai/_display/textual/app.py +14 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +9 -3
- inspect_ai/_display/textual/widgets/task_detail.py +8 -8
- inspect_ai/_display/textual/widgets/tasks.py +17 -1
- inspect_ai/_display/textual/widgets/vscode.py +44 -0
- inspect_ai/_eval/eval.py +74 -25
- inspect_ai/_eval/evalset.py +22 -18
- inspect_ai/_eval/loader.py +34 -11
- inspect_ai/_eval/run.py +13 -15
- inspect_ai/_eval/score.py +13 -3
- inspect_ai/_eval/task/generate.py +8 -9
- inspect_ai/_eval/task/log.py +55 -6
- inspect_ai/_eval/task/run.py +51 -10
- inspect_ai/_eval/task/task.py +23 -9
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/file.py +30 -1
- inspect_ai/_util/json.py +37 -1
- inspect_ai/_util/registry.py +1 -0
- inspect_ai/_util/vscode.py +37 -0
- inspect_ai/_view/server.py +113 -1
- inspect_ai/_view/www/App.css +7 -1
- inspect_ai/_view/www/dist/assets/index.css +813 -415
- inspect_ai/_view/www/dist/assets/index.js +54475 -32003
- inspect_ai/_view/www/eslint.config.mjs +1 -1
- inspect_ai/_view/www/log-schema.json +137 -31
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/package.json +11 -2
- inspect_ai/_view/www/src/App.tsx +161 -853
- inspect_ai/_view/www/src/api/api-browser.ts +176 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
- inspect_ai/_view/www/src/api/client-api.ts +66 -10
- inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
- inspect_ai/_view/www/src/api/types.ts +107 -2
- inspect_ai/_view/www/src/appearance/icons.ts +2 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
- inspect_ai/_view/www/src/components/Card.tsx +6 -4
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
- inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
- inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
- inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
- inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
- inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
- inspect_ai/_view/www/src/components/Modal.module.css +38 -0
- inspect_ai/_view/www/src/components/Modal.tsx +77 -0
- inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
- inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
- inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
- inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
- inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
- inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
- inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
- inspect_ai/_view/www/src/index.tsx +26 -94
- inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
- inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
- inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
- inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
- inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
- inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
- inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
- inspect_ai/_view/www/src/scoring/utils.ts +87 -0
- inspect_ai/_view/www/src/state/appSlice.ts +244 -0
- inspect_ai/_view/www/src/state/hooks.ts +399 -0
- inspect_ai/_view/www/src/state/logPolling.ts +200 -0
- inspect_ai/_view/www/src/state/logSlice.ts +224 -0
- inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
- inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
- inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
- inspect_ai/_view/www/src/state/scrolling.ts +206 -0
- inspect_ai/_view/www/src/state/store.ts +168 -0
- inspect_ai/_view/www/src/state/store_filter.ts +84 -0
- inspect_ai/_view/www/src/state/utils.ts +23 -0
- inspect_ai/_view/www/src/storage/index.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +36 -26
- inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
- inspect_ai/_view/www/src/types.ts +94 -32
- inspect_ai/_view/www/src/utils/attachments.ts +58 -23
- inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
- inspect_ai/_view/www/src/utils/logger.ts +52 -0
- inspect_ai/_view/www/src/utils/polling.ts +100 -0
- inspect_ai/_view/www/src/utils/react.ts +30 -0
- inspect_ai/_view/www/src/utils/vscode.ts +1 -1
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
- inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
- inspect_ai/_view/www/src/workspace/types.ts +4 -3
- inspect_ai/_view/www/src/workspace/utils.ts +4 -4
- inspect_ai/_view/www/vite.config.js +6 -0
- inspect_ai/_view/www/yarn.lock +464 -355
- inspect_ai/agent/__init__.py +36 -0
- inspect_ai/agent/_agent.py +268 -0
- inspect_ai/agent/_as_solver.py +72 -0
- inspect_ai/agent/_as_tool.py +122 -0
- inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
- inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
- inspect_ai/agent/_filter.py +46 -0
- inspect_ai/agent/_handoff.py +93 -0
- inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
- inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
- inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
- inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
- inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
- inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
- inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
- inspect_ai/agent/_react.py +241 -0
- inspect_ai/agent/_run.py +36 -0
- inspect_ai/agent/_types.py +81 -0
- inspect_ai/log/_condense.py +26 -0
- inspect_ai/log/_log.py +17 -5
- inspect_ai/log/_recorders/buffer/__init__.py +14 -0
- inspect_ai/log/_recorders/buffer/buffer.py +30 -0
- inspect_ai/log/_recorders/buffer/database.py +685 -0
- inspect_ai/log/_recorders/buffer/filestore.py +259 -0
- inspect_ai/log/_recorders/buffer/types.py +84 -0
- inspect_ai/log/_recorders/eval.py +2 -11
- inspect_ai/log/_recorders/types.py +30 -0
- inspect_ai/log/_transcript.py +32 -2
- inspect_ai/model/__init__.py +7 -1
- inspect_ai/model/_call_tools.py +257 -52
- inspect_ai/model/_chat_message.py +7 -4
- inspect_ai/model/_conversation.py +13 -62
- inspect_ai/model/_display.py +85 -0
- inspect_ai/model/_generate_config.py +2 -2
- inspect_ai/model/_model.py +114 -14
- inspect_ai/model/_model_output.py +14 -9
- inspect_ai/model/_openai.py +16 -4
- inspect_ai/model/_openai_computer_use.py +162 -0
- inspect_ai/model/_openai_responses.py +319 -165
- inspect_ai/model/_providers/anthropic.py +20 -21
- inspect_ai/model/_providers/azureai.py +24 -13
- inspect_ai/model/_providers/bedrock.py +1 -7
- inspect_ai/model/_providers/cloudflare.py +3 -3
- inspect_ai/model/_providers/goodfire.py +2 -6
- inspect_ai/model/_providers/google.py +11 -10
- inspect_ai/model/_providers/groq.py +6 -3
- inspect_ai/model/_providers/hf.py +7 -3
- inspect_ai/model/_providers/mistral.py +7 -10
- inspect_ai/model/_providers/openai.py +47 -17
- inspect_ai/model/_providers/openai_o1.py +11 -4
- inspect_ai/model/_providers/openai_responses.py +12 -14
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/together.py +12 -2
- inspect_ai/model/_providers/util/chatapi.py +7 -2
- inspect_ai/model/_providers/util/hf_handler.py +4 -2
- inspect_ai/model/_providers/util/llama31.py +4 -2
- inspect_ai/model/_providers/vertex.py +11 -9
- inspect_ai/model/_providers/vllm.py +4 -4
- inspect_ai/scorer/__init__.py +2 -0
- inspect_ai/scorer/_metrics/__init__.py +2 -0
- inspect_ai/scorer/_metrics/grouped.py +84 -0
- inspect_ai/scorer/_score.py +26 -6
- inspect_ai/solver/__init__.py +2 -2
- inspect_ai/solver/_basic_agent.py +22 -9
- inspect_ai/solver/_bridge.py +31 -0
- inspect_ai/solver/_chain.py +20 -12
- inspect_ai/solver/_fork.py +5 -1
- inspect_ai/solver/_human_agent.py +52 -0
- inspect_ai/solver/_prompt.py +3 -1
- inspect_ai/solver/_run.py +59 -0
- inspect_ai/solver/_solver.py +14 -4
- inspect_ai/solver/_task_state.py +5 -3
- inspect_ai/tool/_tool_call.py +15 -8
- inspect_ai/tool/_tool_def.py +17 -12
- inspect_ai/tool/_tool_support_helpers.py +4 -4
- inspect_ai/tool/_tool_with.py +14 -11
- inspect_ai/tool/_tools/_bash_session.py +11 -2
- inspect_ai/tool/_tools/_computer/_common.py +18 -2
- inspect_ai/tool/_tools/_computer/_computer.py +18 -2
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
- inspect_ai/tool/_tools/_think.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_anyio.py +27 -0
- inspect_ai/util/_sandbox/__init__.py +2 -1
- inspect_ai/util/_sandbox/context.py +32 -7
- inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/docker/docker.py +12 -1
- inspect_ai/util/_store_model.py +30 -7
- inspect_ai/util/_subprocess.py +13 -3
- inspect_ai/util/_subtask.py +1 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
- /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -1,46 +1,19 @@
|
|
1
1
|
import clsx from "clsx";
|
2
|
-
import {
|
3
|
-
import { FC, memo, useEffect, useRef } from "react";
|
2
|
+
import { FC } from "react";
|
4
3
|
import { MarkdownDiv } from "../../../components/MarkdownDiv";
|
5
4
|
|
5
|
+
import { usePrismHighlight } from "../../../state/hooks";
|
6
6
|
import styles from "./ToolInput.module.css";
|
7
7
|
|
8
|
-
export const useCodeHighlight = (language?: string) => {
|
9
|
-
const codeRef = useRef<HTMLElement>(null);
|
10
|
-
|
11
|
-
useEffect(() => {
|
12
|
-
if (codeRef.current && language) {
|
13
|
-
highlightElement(codeRef.current);
|
14
|
-
}
|
15
|
-
}, [language]);
|
16
|
-
|
17
|
-
return codeRef;
|
18
|
-
};
|
19
|
-
|
20
8
|
interface ToolInputProps {
|
21
9
|
highlightLanguage?: string;
|
22
10
|
contents?: string | object;
|
23
11
|
toolCallView?: { content: string };
|
24
12
|
}
|
25
|
-
export const ToolInput: FC<ToolInputProps> =
|
13
|
+
export const ToolInput: FC<ToolInputProps> = (props) => {
|
26
14
|
const { highlightLanguage, contents, toolCallView } = props;
|
27
15
|
|
28
|
-
const
|
29
|
-
const toolViewRef = useRef<HTMLDivElement>(null);
|
30
|
-
|
31
|
-
useEffect(() => {
|
32
|
-
if (toolCallView?.content && toolViewRef.current) {
|
33
|
-
requestAnimationFrame(() => {
|
34
|
-
const codeBlocks = toolViewRef.current!.querySelectorAll("pre code");
|
35
|
-
codeBlocks.forEach((block) => {
|
36
|
-
if (block.className.includes("language-")) {
|
37
|
-
block.classList.add("sourceCode");
|
38
|
-
highlightElement(block as HTMLElement);
|
39
|
-
}
|
40
|
-
});
|
41
|
-
});
|
42
|
-
}
|
43
|
-
}, [toolCallView?.content]);
|
16
|
+
const prismParentRef = usePrismHighlight(toolCallView?.content);
|
44
17
|
|
45
18
|
if (!contents && !toolCallView?.content) return null;
|
46
19
|
|
@@ -48,8 +21,8 @@ export const ToolInput: FC<ToolInputProps> = memo((props) => {
|
|
48
21
|
return (
|
49
22
|
<MarkdownDiv
|
50
23
|
markdown={toolCallView.content}
|
51
|
-
ref={
|
52
|
-
className={clsx("text-size-small", "tool-output")}
|
24
|
+
ref={prismParentRef}
|
25
|
+
className={clsx(styles.bottomPadding, "text-size-small", "tool-output")}
|
53
26
|
/>
|
54
27
|
);
|
55
28
|
}
|
@@ -58,18 +31,21 @@ export const ToolInput: FC<ToolInputProps> = memo((props) => {
|
|
58
31
|
typeof contents === "object" ? JSON.stringify(contents) : contents;
|
59
32
|
|
60
33
|
return (
|
61
|
-
<
|
62
|
-
<
|
63
|
-
|
64
|
-
className={clsx(
|
65
|
-
"source-code",
|
66
|
-
"sourceCode",
|
67
|
-
highlightLanguage ? `language-${highlightLanguage}` : undefined,
|
68
|
-
styles.outputCode,
|
69
|
-
)}
|
34
|
+
<div ref={prismParentRef}>
|
35
|
+
<pre
|
36
|
+
className={clsx("tool-output", styles.outputPre, styles.bottomMargin)}
|
70
37
|
>
|
71
|
-
|
72
|
-
|
73
|
-
|
38
|
+
<code
|
39
|
+
className={clsx(
|
40
|
+
"source-code",
|
41
|
+
"sourceCode",
|
42
|
+
highlightLanguage ? `language-${highlightLanguage}` : undefined,
|
43
|
+
styles.outputCode,
|
44
|
+
)}
|
45
|
+
>
|
46
|
+
{formattedContent}
|
47
|
+
</code>
|
48
|
+
</pre>
|
49
|
+
</div>
|
74
50
|
);
|
75
|
-
}
|
51
|
+
};
|
@@ -15,14 +15,14 @@ import {
|
|
15
15
|
export interface SamplesDescriptor {
|
16
16
|
evalDescriptor: EvalDescriptor;
|
17
17
|
messageShape: MessageShape;
|
18
|
-
selectedScoreDescriptor?: ScoreDescriptor;
|
19
18
|
selectedScore: (sample: BasicSampleData) => SelectedScore | undefined;
|
20
|
-
selectedScorerDescriptor: (
|
19
|
+
selectedScorerDescriptor: (
|
20
|
+
sample: BasicSampleData,
|
21
|
+
) => ScorerDescriptor | undefined;
|
21
22
|
}
|
22
23
|
|
23
24
|
export const createEvalDescriptor = (
|
24
25
|
scores: ScoreLabel[],
|
25
|
-
epochs: number,
|
26
26
|
samples?: SampleSummary[],
|
27
27
|
): EvalDescriptor | undefined => {
|
28
28
|
if (!samples) {
|
@@ -47,7 +47,13 @@ export const createEvalDescriptor = (
|
|
47
47
|
sample.scores[scoreLabel.scorer] &&
|
48
48
|
sample.scores[scoreLabel.scorer].value
|
49
49
|
) {
|
50
|
-
|
50
|
+
if (typeof sample.scores[scoreLabel.scorer].value === "object") {
|
51
|
+
return (
|
52
|
+
sample.scores[scoreLabel.scorer].value as Record<string, Value2>
|
53
|
+
)[scoreLabel.name];
|
54
|
+
} else {
|
55
|
+
return sample.scores[scoreLabel.scorer].value;
|
56
|
+
}
|
51
57
|
} else if (sample.scores[scoreLabel.name]) {
|
52
58
|
return sample.scores[scoreLabel.name].value;
|
53
59
|
} else {
|
@@ -57,10 +63,10 @@ export const createEvalDescriptor = (
|
|
57
63
|
|
58
64
|
const scoreAnswer = (
|
59
65
|
sample: BasicSampleData,
|
60
|
-
scorer:
|
66
|
+
scorer: ScoreLabel,
|
61
67
|
): string | undefined => {
|
62
68
|
if (sample && sample.scores) {
|
63
|
-
const sampleScore = sample.scores[scorer];
|
69
|
+
const sampleScore = sample.scores[scorer.name];
|
64
70
|
if (sampleScore && sampleScore.answer) {
|
65
71
|
return sampleScore.answer;
|
66
72
|
}
|
@@ -162,7 +168,7 @@ export const createEvalDescriptor = (
|
|
162
168
|
return "null";
|
163
169
|
} else if (score === undefined) {
|
164
170
|
return "";
|
165
|
-
} else if (
|
171
|
+
} else if (descriptor && descriptor.render) {
|
166
172
|
return descriptor.render(score);
|
167
173
|
} else {
|
168
174
|
return <span>{String(score)}</span>;
|
@@ -181,7 +187,7 @@ export const createEvalDescriptor = (
|
|
181
187
|
return scoreExplanation(sample, scoreLabel.scorer) || "";
|
182
188
|
},
|
183
189
|
answer: () => {
|
184
|
-
return scoreAnswer(sample, scoreLabel
|
190
|
+
return scoreAnswer(sample, scoreLabel) || "";
|
185
191
|
},
|
186
192
|
scores: () => {
|
187
193
|
if (!sample || !sample.scores) {
|
@@ -252,8 +258,11 @@ export const createEvalDescriptor = (
|
|
252
258
|
|
253
259
|
const score = (
|
254
260
|
sample: BasicSampleData,
|
255
|
-
scoreLabel
|
256
|
-
): SelectedScore => {
|
261
|
+
scoreLabel?: ScoreLabel,
|
262
|
+
): SelectedScore | undefined => {
|
263
|
+
if (!scoreLabel) {
|
264
|
+
return undefined;
|
265
|
+
}
|
257
266
|
return {
|
258
267
|
value: scoreValue(sample, scoreLabel),
|
259
268
|
render: () => {
|
@@ -263,8 +272,6 @@ export const createEvalDescriptor = (
|
|
263
272
|
};
|
264
273
|
|
265
274
|
return {
|
266
|
-
epochs,
|
267
|
-
samples,
|
268
275
|
scores,
|
269
276
|
scorerDescriptor,
|
270
277
|
scoreDescriptor,
|
@@ -274,14 +281,17 @@ export const createEvalDescriptor = (
|
|
274
281
|
};
|
275
282
|
|
276
283
|
export const createSamplesDescriptor = (
|
284
|
+
samples: SampleSummary[],
|
277
285
|
evalDescriptor: EvalDescriptor,
|
278
|
-
selectedScore
|
286
|
+
selectedScore?: ScoreLabel,
|
279
287
|
): SamplesDescriptor | undefined => {
|
280
288
|
// Find the total length of the value so we can compute an average
|
281
|
-
const sizes =
|
289
|
+
const sizes = samples.reduce(
|
282
290
|
(previous, current) => {
|
283
291
|
const text = inputString(current.input).join(" ");
|
284
|
-
const score =
|
292
|
+
const score = selectedScore
|
293
|
+
? evalDescriptor.score(current, selectedScore)
|
294
|
+
: undefined;
|
285
295
|
const scoreValue = score?.value;
|
286
296
|
const scoreText = scoreValue
|
287
297
|
? String(scoreValue)
|
@@ -296,7 +306,9 @@ export const createSamplesDescriptor = (
|
|
296
306
|
previous[2] = Math.min(
|
297
307
|
Math.max(
|
298
308
|
previous[2],
|
299
|
-
|
309
|
+
selectedScore
|
310
|
+
? evalDescriptor.scoreAnswer(current, selectedScore)?.length || 0
|
311
|
+
: 0,
|
300
312
|
),
|
301
313
|
300,
|
302
314
|
);
|
@@ -322,7 +334,7 @@ export const createSamplesDescriptor = (
|
|
322
334
|
answer: Math.min(sizes[2], 300),
|
323
335
|
limit: Math.min(sizes[3], 50),
|
324
336
|
id: Math.min(sizes[4], 10),
|
325
|
-
score: Math.min(sizes[
|
337
|
+
score: Math.min(sizes[5], 30),
|
326
338
|
};
|
327
339
|
const base =
|
328
340
|
maxSizes.input +
|
@@ -353,10 +365,12 @@ export const createSamplesDescriptor = (
|
|
353
365
|
return {
|
354
366
|
evalDescriptor,
|
355
367
|
messageShape,
|
356
|
-
|
357
|
-
|
368
|
+
selectedScore: (sample) =>
|
369
|
+
selectedScore ? evalDescriptor.score(sample, selectedScore) : undefined,
|
358
370
|
selectedScorerDescriptor: (sample) =>
|
359
|
-
|
371
|
+
selectedScore
|
372
|
+
? evalDescriptor.scorerDescriptor(sample, selectedScore)
|
373
|
+
: undefined,
|
360
374
|
};
|
361
375
|
};
|
362
376
|
|
@@ -1,11 +1,11 @@
|
|
1
1
|
.circle {
|
2
2
|
font-family: "Consola Regular";
|
3
|
-
width:
|
4
|
-
height:
|
3
|
+
width: 40px;
|
4
|
+
height: 30px;
|
5
5
|
display: inline-flex;
|
6
6
|
justify-content: center;
|
7
7
|
align-items: center;
|
8
|
-
border-radius:
|
8
|
+
border-radius: 15px;
|
9
9
|
padding-top: 1px;
|
10
10
|
}
|
11
11
|
|
@@ -35,7 +35,7 @@ export const objectScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
|
|
35
35
|
|
36
36
|
const scores: JSX.Element[] = [];
|
37
37
|
const keys = Object.keys(score);
|
38
|
-
keys.forEach((key
|
38
|
+
keys.forEach((key) => {
|
39
39
|
if (typeof score !== "object" || Array.isArray(score)) {
|
40
40
|
throw new Error(
|
41
41
|
"Unexpected us of object score descriptor for non-score object",
|
@@ -50,22 +50,22 @@ export const objectScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
|
|
50
50
|
: parseFloat(value === true ? "1" : value),
|
51
51
|
)
|
52
52
|
: String(value);
|
53
|
+
|
53
54
|
scores.push(
|
54
|
-
|
55
|
-
className={clsx(
|
56
|
-
styles.container,
|
57
|
-
index + 1 < keys.length ? styles.padded : undefined,
|
58
|
-
)}
|
59
|
-
>
|
55
|
+
<>
|
60
56
|
<div className={clsx(styles.key, "text-size-smaller")}>{key}</div>
|
61
|
-
<div className={clsx(styles.value, "text-size-
|
57
|
+
<div className={clsx(styles.value, "text-size-base")}>
|
62
58
|
{formattedValue}
|
63
59
|
</div>
|
64
|
-
|
60
|
+
</>,
|
65
61
|
);
|
66
62
|
});
|
67
63
|
|
68
|
-
return
|
64
|
+
return (
|
65
|
+
<div key={`score-value`} className={clsx(styles.container)}>
|
66
|
+
{scores}
|
67
|
+
</div>
|
68
|
+
);
|
69
69
|
},
|
70
70
|
};
|
71
71
|
};
|
@@ -1,11 +1,9 @@
|
|
1
1
|
import { ReactNode } from "react";
|
2
|
-
import { BasicSampleData
|
2
|
+
import { BasicSampleData } from "../../api/types";
|
3
3
|
import { ScoreLabel } from "../../types";
|
4
4
|
import { Value2 } from "../../types/log";
|
5
5
|
|
6
6
|
export interface EvalDescriptor {
|
7
|
-
epochs: number;
|
8
|
-
samples: SampleSummary[];
|
9
7
|
scores: ScoreLabel[];
|
10
8
|
scoreDescriptor: (scoreLabel: ScoreLabel) => ScoreDescriptor;
|
11
9
|
scorerDescriptor: (
|
@@ -14,9 +12,12 @@ export interface EvalDescriptor {
|
|
14
12
|
) => ScorerDescriptor;
|
15
13
|
score: (
|
16
14
|
sample: BasicSampleData,
|
17
|
-
scoreLabel
|
15
|
+
scoreLabel?: ScoreLabel,
|
18
16
|
) => SelectedScore | undefined;
|
19
|
-
scoreAnswer: (
|
17
|
+
scoreAnswer: (
|
18
|
+
sample: BasicSampleData,
|
19
|
+
scorer: ScoreLabel,
|
20
|
+
) => string | undefined;
|
20
21
|
}
|
21
22
|
|
22
23
|
export interface ScorerDescriptor {
|
@@ -2,8 +2,27 @@
|
|
2
2
|
border-top: solid var(--bs-light-border-subtle) 1px;
|
3
3
|
background: var(--bs-light-bg-subtle);
|
4
4
|
display: grid;
|
5
|
-
grid-template-columns: max-content;
|
6
|
-
justify-content:
|
7
|
-
|
5
|
+
grid-template-columns: max-content max-content;
|
6
|
+
justify-content: space-between;
|
7
|
+
|
8
8
|
padding: 0.2em 1em;
|
9
9
|
}
|
10
|
+
|
11
|
+
.spinnerContainer {
|
12
|
+
display: grid;
|
13
|
+
grid-template-columns: max-content max-content;
|
14
|
+
column-gap: 0.3em;
|
15
|
+
padding-top: 0.2em;
|
16
|
+
}
|
17
|
+
|
18
|
+
.spinner {
|
19
|
+
height: 11px;
|
20
|
+
width: 11px;
|
21
|
+
color: var(--bs-secondary);
|
22
|
+
border-width: 1px;
|
23
|
+
}
|
24
|
+
|
25
|
+
.label {
|
26
|
+
margin-left: 0.1em;
|
27
|
+
margin-top: -3px;
|
28
|
+
}
|
@@ -1,15 +1,40 @@
|
|
1
1
|
interface SampleFooterProps {
|
2
2
|
sampleCount: number;
|
3
|
+
totalSampleCount: number;
|
4
|
+
running: boolean;
|
3
5
|
}
|
4
6
|
|
5
7
|
import clsx from "clsx";
|
6
8
|
import { FC } from "react";
|
7
9
|
import styles from "./SampleFooter.module.css";
|
8
10
|
|
9
|
-
export const SampleFooter: FC<SampleFooterProps> = ({
|
11
|
+
export const SampleFooter: FC<SampleFooterProps> = ({
|
12
|
+
sampleCount,
|
13
|
+
totalSampleCount,
|
14
|
+
running,
|
15
|
+
}) => {
|
10
16
|
return (
|
11
17
|
<div className={clsx("text-size-smaller", styles.footer)}>
|
12
|
-
<div>
|
18
|
+
<div>
|
19
|
+
{running ? (
|
20
|
+
<div className={clsx(styles.spinnerContainer)}>
|
21
|
+
<div
|
22
|
+
className={clsx("spinner-border", styles.spinner)}
|
23
|
+
role="status"
|
24
|
+
>
|
25
|
+
<span className={clsx("visually-hidden")}>Running...</span>
|
26
|
+
</div>
|
27
|
+
<div className={clsx("text-style-secondary", styles.label)}>
|
28
|
+
running...
|
29
|
+
</div>
|
30
|
+
</div>
|
31
|
+
) : undefined}
|
32
|
+
</div>
|
33
|
+
<div>
|
34
|
+
{sampleCount < totalSampleCount
|
35
|
+
? `${sampleCount} / ${totalSampleCount} Samples`
|
36
|
+
: `${sampleCount} Samples`}
|
37
|
+
</div>
|
13
38
|
</div>
|
14
39
|
);
|
15
40
|
};
|