inspect-ai 0.3.82__py3-none-any.whl → 0.3.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_display/textual/app.py +14 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +9 -3
- inspect_ai/_display/textual/widgets/task_detail.py +3 -4
- inspect_ai/_display/textual/widgets/tasks.py +17 -1
- inspect_ai/_display/textual/widgets/vscode.py +44 -0
- inspect_ai/_eval/eval.py +36 -24
- inspect_ai/_eval/evalset.py +17 -18
- inspect_ai/_eval/loader.py +34 -11
- inspect_ai/_eval/run.py +8 -13
- inspect_ai/_eval/score.py +13 -3
- inspect_ai/_eval/task/generate.py +8 -9
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/task.py +23 -9
- inspect_ai/_util/file.py +13 -0
- inspect_ai/_util/json.py +2 -1
- inspect_ai/_util/registry.py +1 -0
- inspect_ai/_util/vscode.py +37 -0
- inspect_ai/_view/www/App.css +6 -0
- inspect_ai/_view/www/dist/assets/index.css +304 -128
- inspect_ai/_view/www/dist/assets/index.js +47495 -27519
- inspect_ai/_view/www/log-schema.json +124 -31
- inspect_ai/_view/www/package.json +3 -0
- inspect_ai/_view/www/src/App.tsx +12 -0
- inspect_ai/_view/www/src/appearance/icons.ts +1 -0
- inspect_ai/_view/www/src/components/Card.tsx +6 -4
- inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
- inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
- inspect_ai/_view/www/src/components/Modal.module.css +38 -0
- inspect_ai/_view/www/src/components/Modal.tsx +77 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
- inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
- inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
- inspect_ai/_view/www/src/state/hooks.ts +5 -3
- inspect_ai/_view/www/src/state/logPolling.ts +5 -1
- inspect_ai/_view/www/src/state/logSlice.ts +10 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
- inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
- inspect_ai/_view/www/src/types/log.d.ts +34 -26
- inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
- inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
- inspect_ai/_view/www/yarn.lock +94 -1
- inspect_ai/agent/__init__.py +36 -0
- inspect_ai/agent/_agent.py +268 -0
- inspect_ai/agent/_as_solver.py +72 -0
- inspect_ai/agent/_as_tool.py +122 -0
- inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
- inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
- inspect_ai/agent/_filter.py +46 -0
- inspect_ai/agent/_handoff.py +93 -0
- inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
- inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
- inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
- inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
- inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
- inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
- inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
- inspect_ai/agent/_react.py +241 -0
- inspect_ai/agent/_run.py +36 -0
- inspect_ai/agent/_types.py +81 -0
- inspect_ai/log/_log.py +11 -2
- inspect_ai/log/_transcript.py +13 -9
- inspect_ai/model/__init__.py +7 -1
- inspect_ai/model/_call_tools.py +256 -52
- inspect_ai/model/_chat_message.py +7 -4
- inspect_ai/model/_conversation.py +13 -62
- inspect_ai/model/_display.py +85 -0
- inspect_ai/model/_model.py +113 -14
- inspect_ai/model/_model_output.py +14 -9
- inspect_ai/model/_openai.py +16 -4
- inspect_ai/model/_openai_computer_use.py +162 -0
- inspect_ai/model/_openai_responses.py +319 -165
- inspect_ai/model/_providers/anthropic.py +20 -21
- inspect_ai/model/_providers/azureai.py +24 -13
- inspect_ai/model/_providers/bedrock.py +1 -7
- inspect_ai/model/_providers/cloudflare.py +3 -3
- inspect_ai/model/_providers/goodfire.py +2 -6
- inspect_ai/model/_providers/google.py +11 -10
- inspect_ai/model/_providers/groq.py +6 -3
- inspect_ai/model/_providers/hf.py +7 -3
- inspect_ai/model/_providers/mistral.py +7 -10
- inspect_ai/model/_providers/openai.py +47 -17
- inspect_ai/model/_providers/openai_o1.py +11 -4
- inspect_ai/model/_providers/openai_responses.py +12 -14
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/together.py +12 -2
- inspect_ai/model/_providers/util/chatapi.py +7 -2
- inspect_ai/model/_providers/util/hf_handler.py +4 -2
- inspect_ai/model/_providers/util/llama31.py +4 -2
- inspect_ai/model/_providers/vertex.py +11 -9
- inspect_ai/model/_providers/vllm.py +4 -4
- inspect_ai/scorer/__init__.py +2 -0
- inspect_ai/scorer/_metrics/__init__.py +2 -0
- inspect_ai/scorer/_metrics/grouped.py +84 -0
- inspect_ai/scorer/_score.py +26 -6
- inspect_ai/solver/__init__.py +2 -2
- inspect_ai/solver/_basic_agent.py +22 -9
- inspect_ai/solver/_bridge.py +31 -0
- inspect_ai/solver/_chain.py +20 -12
- inspect_ai/solver/_fork.py +5 -1
- inspect_ai/solver/_human_agent.py +52 -0
- inspect_ai/solver/_prompt.py +3 -1
- inspect_ai/solver/_run.py +59 -0
- inspect_ai/solver/_solver.py +14 -4
- inspect_ai/solver/_task_state.py +5 -3
- inspect_ai/tool/_tool_call.py +15 -8
- inspect_ai/tool/_tool_def.py +17 -12
- inspect_ai/tool/_tool_support_helpers.py +2 -2
- inspect_ai/tool/_tool_with.py +14 -11
- inspect_ai/tool/_tools/_bash_session.py +11 -2
- inspect_ai/tool/_tools/_computer/_common.py +18 -2
- inspect_ai/tool/_tools/_computer/_computer.py +18 -2
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
- inspect_ai/tool/_tools/_think.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_anyio.py +27 -0
- inspect_ai/util/_sandbox/__init__.py +2 -1
- inspect_ai/util/_sandbox/context.py +32 -7
- inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/docker/docker.py +12 -1
- inspect_ai/util/_store_model.py +30 -7
- inspect_ai/util/_subprocess.py +13 -3
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +179 -153
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
- /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,77 @@
|
|
1
|
+
import clsx from "clsx";
|
2
|
+
import { FC, ReactNode } from "react";
|
3
|
+
import styles from "./Modal.module.css";
|
4
|
+
|
5
|
+
interface ModalProps {
|
6
|
+
id: string;
|
7
|
+
showing: boolean;
|
8
|
+
setShowing: (showing: boolean) => void;
|
9
|
+
title?: string;
|
10
|
+
children: ReactNode;
|
11
|
+
className?: string | string[];
|
12
|
+
}
|
13
|
+
|
14
|
+
export const Modal: FC<ModalProps> = ({
|
15
|
+
id,
|
16
|
+
title,
|
17
|
+
showing,
|
18
|
+
setShowing,
|
19
|
+
children,
|
20
|
+
className,
|
21
|
+
}) => {
|
22
|
+
return (
|
23
|
+
<>
|
24
|
+
{showing && (
|
25
|
+
<div className={styles.backdrop} onClick={() => setShowing(false)} />
|
26
|
+
)}
|
27
|
+
<div
|
28
|
+
id={id}
|
29
|
+
className={clsx("modal", "fade", showing ? "show" : "", className)}
|
30
|
+
tabIndex={-1}
|
31
|
+
style={{ display: showing ? "block" : "none" }}
|
32
|
+
>
|
33
|
+
<div className={clsx("modal-dialog", styles.modal)}>
|
34
|
+
<div className="modal-content">
|
35
|
+
<div className={clsx("modal-header", styles.header)}>
|
36
|
+
<div
|
37
|
+
className={clsx(
|
38
|
+
"modal-title",
|
39
|
+
"text-size-base",
|
40
|
+
styles.modalTitle,
|
41
|
+
)}
|
42
|
+
>
|
43
|
+
{title}
|
44
|
+
</div>
|
45
|
+
<button
|
46
|
+
type="button"
|
47
|
+
className={clsx(
|
48
|
+
"btn-close",
|
49
|
+
"text-size-smaller",
|
50
|
+
styles.btnClose,
|
51
|
+
)}
|
52
|
+
data-bs-dismiss="modal"
|
53
|
+
aria-label="Close"
|
54
|
+
onClick={() => {
|
55
|
+
setShowing(!showing);
|
56
|
+
}}
|
57
|
+
></button>
|
58
|
+
</div>
|
59
|
+
<div className="modal-body">{children}</div>
|
60
|
+
<div className="modal-footer">
|
61
|
+
<button
|
62
|
+
type="button"
|
63
|
+
className="btn btn-secondary"
|
64
|
+
data-bs-dismiss="modal"
|
65
|
+
onClick={() => {
|
66
|
+
setShowing(!showing);
|
67
|
+
}}
|
68
|
+
>
|
69
|
+
Close
|
70
|
+
</button>
|
71
|
+
</div>
|
72
|
+
</div>
|
73
|
+
</div>
|
74
|
+
</div>
|
75
|
+
</>
|
76
|
+
);
|
77
|
+
};
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import clsx from "clsx";
|
2
2
|
import { FC } from "react";
|
3
|
-
import {
|
4
|
-
import styles from "./
|
3
|
+
import { MetaDataGrid } from "../metadata/MetaDataGrid";
|
4
|
+
import styles from "./DetailStep.module.css";
|
5
5
|
|
6
6
|
interface DetailStepProps {
|
7
7
|
icon?: string;
|
@@ -22,7 +22,10 @@ export const DetailStep: FC<DetailStepProps> = ({
|
|
22
22
|
{iconHtml} {name}
|
23
23
|
<div className={styles.container}>
|
24
24
|
{params ? (
|
25
|
-
<
|
25
|
+
<MetaDataGrid
|
26
|
+
entries={params}
|
27
|
+
className={clsx("text-size-small", styles.metadata)}
|
28
|
+
/>
|
26
29
|
) : (
|
27
30
|
""
|
28
31
|
)}
|
@@ -25,8 +25,15 @@ export const InlineSampleDisplay: FC<InlineSampleDisplayProps> = ({
|
|
25
25
|
// Sample hooks
|
26
26
|
const sampleData = useSampleData();
|
27
27
|
const loadSample = useStore((state) => state.sampleActions.loadSample);
|
28
|
+
const pollSample = useStore((state) => state.sampleActions.pollSample);
|
28
29
|
const logSelection = useLogSelection();
|
29
30
|
|
31
|
+
useEffect(() => {
|
32
|
+
if (sampleData.running && logSelection.logFile && logSelection.sample) {
|
33
|
+
pollSample(logSelection.logFile, logSelection.sample);
|
34
|
+
}
|
35
|
+
}, []);
|
36
|
+
|
30
37
|
// Sample Loading
|
31
38
|
const prevCompleted = usePrevious(
|
32
39
|
logSelection.sample?.completed !== undefined
|
@@ -39,8 +39,15 @@ export const SampleDialog: FC<SampleDialogProps> = ({
|
|
39
39
|
// Sample hooks
|
40
40
|
const sampleData = useSampleData();
|
41
41
|
const loadSample = useStore((state) => state.sampleActions.loadSample);
|
42
|
+
const pollSample = useStore((state) => state.sampleActions.pollSample);
|
42
43
|
const logSelection = useLogSelection();
|
43
44
|
|
45
|
+
useEffect(() => {
|
46
|
+
if (sampleData.running && logSelection.logFile && logSelection.sample) {
|
47
|
+
pollSample(logSelection.logFile, logSelection.sample);
|
48
|
+
}
|
49
|
+
}, []);
|
50
|
+
|
44
51
|
// Load sample
|
45
52
|
const prevCompleted = usePrevious(
|
46
53
|
logSelection.sample?.completed !== undefined
|
@@ -7,7 +7,6 @@ import { isVscode } from "../utils/vscode";
|
|
7
7
|
import { ApplicationIcons } from "../appearance/icons";
|
8
8
|
import { ANSIDisplay } from "../components/AnsiDisplay";
|
9
9
|
import { ToolButton } from "../components/ToolButton";
|
10
|
-
import { SampleScoreView } from "./scores/SampleScoreView";
|
11
10
|
|
12
11
|
import clsx from "clsx";
|
13
12
|
import {
|
@@ -40,6 +39,7 @@ import { ChatViewVirtualList } from "./chat/ChatViewVirtualList";
|
|
40
39
|
import { messagesFromEvents } from "./chat/messages";
|
41
40
|
import styles from "./SampleDisplay.module.css";
|
42
41
|
import { SampleSummaryView } from "./SampleSummaryView";
|
42
|
+
import { SampleScoresView } from "./scores/SampleScoresView";
|
43
43
|
import { TranscriptVirtualList } from "./transcript/TranscriptView";
|
44
44
|
|
45
45
|
interface SampleDisplayProps {
|
@@ -92,7 +92,6 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
92
92
|
return false;
|
93
93
|
};
|
94
94
|
|
95
|
-
const scorerNames = Object.keys(sample?.scores || {});
|
96
95
|
const sampleMetadatas = metadataViewsForSample(`${baseId}-${id}`, sample);
|
97
96
|
|
98
97
|
const tabsetId = `task-sample-details-tab-${id}`;
|
@@ -166,38 +165,16 @@ export const SampleDisplay: FC<SampleDisplayProps> = ({
|
|
166
165
|
running={running}
|
167
166
|
/>
|
168
167
|
</TabPanel>
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
</TabPanel>
|
180
|
-
) : (
|
181
|
-
<>
|
182
|
-
{sample
|
183
|
-
? Object.keys(sample?.scores || {}).map((scorer) => {
|
184
|
-
const tabId = `score-${scorer}`;
|
185
|
-
return (
|
186
|
-
<TabPanel
|
187
|
-
key={tabId}
|
188
|
-
id={tabId}
|
189
|
-
className="sample-tab"
|
190
|
-
title={scorer}
|
191
|
-
onSelected={onSelectedTab}
|
192
|
-
selected={selectedTab === tabId}
|
193
|
-
>
|
194
|
-
<SampleScoreView sample={sample} scorer={scorer} />
|
195
|
-
</TabPanel>
|
196
|
-
);
|
197
|
-
})
|
198
|
-
: undefined}
|
199
|
-
</>
|
200
|
-
)}
|
168
|
+
<TabPanel
|
169
|
+
key={kSampleScoringTabId}
|
170
|
+
id={kSampleScoringTabId}
|
171
|
+
className="sample-tab"
|
172
|
+
title="Scoring"
|
173
|
+
onSelected={onSelectedTab}
|
174
|
+
selected={selectedTab === kSampleScoringTabId}
|
175
|
+
>
|
176
|
+
<SampleScoresView sample={sample} />
|
177
|
+
</TabPanel>
|
201
178
|
<TabPanel
|
202
179
|
id={kSampleMetdataTabId}
|
203
180
|
className={clsx("sample-tab")}
|
@@ -123,7 +123,7 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
|
|
123
123
|
|
124
124
|
columns.push({
|
125
125
|
label: "Input",
|
126
|
-
value: fields.input
|
126
|
+
value: <MarkdownDiv markdown={fields.input.join(" ")} />,
|
127
127
|
size: `${input}fr`,
|
128
128
|
clamp: true,
|
129
129
|
});
|
@@ -233,7 +233,7 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
|
|
233
233
|
styles.value,
|
234
234
|
styles.wrap,
|
235
235
|
col.clamp ? "three-line-clamp" : undefined,
|
236
|
-
col.center ? styles.
|
236
|
+
col.center ? styles.centerValue : undefined,
|
237
237
|
)}
|
238
238
|
>
|
239
239
|
{col.value}
|
@@ -44,3 +44,15 @@ export const SampleTools: FC<SampleToolsProps> = ({ samples }) => {
|
|
44
44
|
</Fragment>
|
45
45
|
);
|
46
46
|
};
|
47
|
+
|
48
|
+
interface ScoreFilterToolsProps {}
|
49
|
+
|
50
|
+
export const ScoreFilterTools: FC<ScoreFilterToolsProps> = () => {
|
51
|
+
const scores = useScores();
|
52
|
+
const score = useScore();
|
53
|
+
const setScore = useStore((state) => state.logActions.setScore);
|
54
|
+
if (scores.length <= 1) {
|
55
|
+
return undefined;
|
56
|
+
}
|
57
|
+
return <SelectScorer scores={scores} score={score} setScore={setScore} />;
|
58
|
+
};
|
@@ -52,6 +52,7 @@ export const MessageContent: FC<MessageContentProps> = ({ contents }) => {
|
|
52
52
|
{
|
53
53
|
type: "text",
|
54
54
|
text: content,
|
55
|
+
refusal: null,
|
55
56
|
},
|
56
57
|
index === contents.length - 1,
|
57
58
|
);
|
@@ -75,6 +76,7 @@ export const MessageContent: FC<MessageContentProps> = ({ contents }) => {
|
|
75
76
|
const contentText: ContentText = {
|
76
77
|
type: "text",
|
77
78
|
text: contents,
|
79
|
+
refusal: null,
|
78
80
|
};
|
79
81
|
return messageRenderers["text"].render(
|
80
82
|
"text-message-content",
|
@@ -101,6 +101,7 @@ const resolveToolMessage = (toolMessage?: ChatMessageTool): ContentTool[] => {
|
|
101
101
|
{
|
102
102
|
type: "text",
|
103
103
|
text: content,
|
104
|
+
refusal: null,
|
104
105
|
},
|
105
106
|
],
|
106
107
|
},
|
@@ -115,6 +116,7 @@ const resolveToolMessage = (toolMessage?: ChatMessageTool): ContentTool[] => {
|
|
115
116
|
{
|
116
117
|
type: "text",
|
117
118
|
text: con,
|
119
|
+
refusal: null,
|
118
120
|
},
|
119
121
|
],
|
120
122
|
} as ContentTool;
|
@@ -70,6 +70,7 @@ export const resolveMessages = (messages: Messages) => {
|
|
70
70
|
role: "system",
|
71
71
|
content: systemContent,
|
72
72
|
source: "input",
|
73
|
+
internal: null,
|
73
74
|
};
|
74
75
|
|
75
76
|
// Converge them
|
@@ -120,6 +121,7 @@ const normalizeContent = (
|
|
120
121
|
return {
|
121
122
|
type: "text",
|
122
123
|
text: content,
|
124
|
+
refusal: null,
|
123
125
|
};
|
124
126
|
} else {
|
125
127
|
return content;
|
@@ -151,7 +153,7 @@ export const messagesFromEvents = (runningEvents: Events): Messages => {
|
|
151
153
|
}
|
152
154
|
});
|
153
155
|
|
154
|
-
if (messages.
|
156
|
+
if (messages.size > 0) {
|
155
157
|
return messages.values().toArray();
|
156
158
|
} else {
|
157
159
|
return [];
|
@@ -47,7 +47,13 @@ export const createEvalDescriptor = (
|
|
47
47
|
sample.scores[scoreLabel.scorer] &&
|
48
48
|
sample.scores[scoreLabel.scorer].value
|
49
49
|
) {
|
50
|
-
|
50
|
+
if (typeof sample.scores[scoreLabel.scorer].value === "object") {
|
51
|
+
return (
|
52
|
+
sample.scores[scoreLabel.scorer].value as Record<string, Value2>
|
53
|
+
)[scoreLabel.name];
|
54
|
+
} else {
|
55
|
+
return sample.scores[scoreLabel.scorer].value;
|
56
|
+
}
|
51
57
|
} else if (sample.scores[scoreLabel.name]) {
|
52
58
|
return sample.scores[scoreLabel.name].value;
|
53
59
|
} else {
|
@@ -162,7 +168,7 @@ export const createEvalDescriptor = (
|
|
162
168
|
return "null";
|
163
169
|
} else if (score === undefined) {
|
164
170
|
return "";
|
165
|
-
} else if (
|
171
|
+
} else if (descriptor && descriptor.render) {
|
166
172
|
return descriptor.render(score);
|
167
173
|
} else {
|
168
174
|
return <span>{String(score)}</span>;
|
@@ -328,7 +334,7 @@ export const createSamplesDescriptor = (
|
|
328
334
|
answer: Math.min(sizes[2], 300),
|
329
335
|
limit: Math.min(sizes[3], 50),
|
330
336
|
id: Math.min(sizes[4], 10),
|
331
|
-
score: Math.min(sizes[
|
337
|
+
score: Math.min(sizes[5], 30),
|
332
338
|
};
|
333
339
|
const base =
|
334
340
|
maxSizes.input +
|
@@ -1,11 +1,11 @@
|
|
1
1
|
.circle {
|
2
2
|
font-family: "Consola Regular";
|
3
|
-
width:
|
4
|
-
height:
|
3
|
+
width: 40px;
|
4
|
+
height: 30px;
|
5
5
|
display: inline-flex;
|
6
6
|
justify-content: center;
|
7
7
|
align-items: center;
|
8
|
-
border-radius:
|
8
|
+
border-radius: 15px;
|
9
9
|
padding-top: 1px;
|
10
10
|
}
|
11
11
|
|
@@ -35,7 +35,7 @@ export const objectScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
|
|
35
35
|
|
36
36
|
const scores: JSX.Element[] = [];
|
37
37
|
const keys = Object.keys(score);
|
38
|
-
keys.forEach((key
|
38
|
+
keys.forEach((key) => {
|
39
39
|
if (typeof score !== "object" || Array.isArray(score)) {
|
40
40
|
throw new Error(
|
41
41
|
"Unexpected us of object score descriptor for non-score object",
|
@@ -50,23 +50,22 @@ export const objectScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
|
|
50
50
|
: parseFloat(value === true ? "1" : value),
|
51
51
|
)
|
52
52
|
: String(value);
|
53
|
+
|
53
54
|
scores.push(
|
54
|
-
|
55
|
-
key={`score-value-${index}`}
|
56
|
-
className={clsx(
|
57
|
-
styles.container,
|
58
|
-
index + 1 < keys.length ? styles.padded : undefined,
|
59
|
-
)}
|
60
|
-
>
|
55
|
+
<>
|
61
56
|
<div className={clsx(styles.key, "text-size-smaller")}>{key}</div>
|
62
|
-
<div className={clsx(styles.value, "text-size-
|
57
|
+
<div className={clsx(styles.value, "text-size-base")}>
|
63
58
|
{formattedValue}
|
64
59
|
</div>
|
65
|
-
|
60
|
+
</>,
|
66
61
|
);
|
67
62
|
});
|
68
63
|
|
69
|
-
return
|
64
|
+
return (
|
65
|
+
<div key={`score-value`} className={clsx(styles.container)}>
|
66
|
+
{scores}
|
67
|
+
</div>
|
68
|
+
);
|
70
69
|
},
|
71
70
|
};
|
72
71
|
};
|
@@ -1,5 +1,6 @@
|
|
1
1
|
interface SampleFooterProps {
|
2
2
|
sampleCount: number;
|
3
|
+
totalSampleCount: number;
|
3
4
|
running: boolean;
|
4
5
|
}
|
5
6
|
|
@@ -9,6 +10,7 @@ import styles from "./SampleFooter.module.css";
|
|
9
10
|
|
10
11
|
export const SampleFooter: FC<SampleFooterProps> = ({
|
11
12
|
sampleCount,
|
13
|
+
totalSampleCount,
|
12
14
|
running,
|
13
15
|
}) => {
|
14
16
|
return (
|
@@ -28,7 +30,11 @@ export const SampleFooter: FC<SampleFooterProps> = ({
|
|
28
30
|
</div>
|
29
31
|
) : undefined}
|
30
32
|
</div>
|
31
|
-
<div>
|
33
|
+
<div>
|
34
|
+
{sampleCount < totalSampleCount
|
35
|
+
? `${sampleCount} / ${totalSampleCount} Samples`
|
36
|
+
: `${sampleCount} Samples`}
|
37
|
+
</div>
|
32
38
|
</div>
|
33
39
|
);
|
34
40
|
};
|
@@ -29,6 +29,7 @@ const kSeparatorHeight = 24;
|
|
29
29
|
|
30
30
|
interface SampleListProps {
|
31
31
|
items: ListItem[];
|
32
|
+
totalItemCount: number;
|
32
33
|
running: boolean;
|
33
34
|
nextSample: () => void;
|
34
35
|
prevSample: () => void;
|
@@ -37,9 +38,12 @@ interface SampleListProps {
|
|
37
38
|
listHandle: RefObject<VirtuosoHandle | null>;
|
38
39
|
}
|
39
40
|
|
41
|
+
export const kSampleFollowProp = "sample-list";
|
42
|
+
|
40
43
|
export const SampleList: FC<SampleListProps> = memo((props) => {
|
41
44
|
const {
|
42
45
|
items,
|
46
|
+
totalItemCount,
|
43
47
|
running,
|
44
48
|
nextSample,
|
45
49
|
prevSample,
|
@@ -57,9 +61,13 @@ export const SampleList: FC<SampleListProps> = memo((props) => {
|
|
57
61
|
(state) => state.log.selectedSampleIndex,
|
58
62
|
);
|
59
63
|
const samplesDescriptor = useSampleDescriptor();
|
60
|
-
const [followOutput, setFollowOutput] = useProperty(
|
61
|
-
|
62
|
-
|
64
|
+
const [followOutput, setFollowOutput] = useProperty(
|
65
|
+
kSampleFollowProp,
|
66
|
+
"follow",
|
67
|
+
{
|
68
|
+
defaultValue: !!running,
|
69
|
+
},
|
70
|
+
);
|
63
71
|
|
64
72
|
// Track whether we were previously running so we can
|
65
73
|
// decide whether to pop up to the top
|
@@ -84,13 +92,15 @@ export const SampleList: FC<SampleListProps> = memo((props) => {
|
|
84
92
|
prevRunningRef.current = running;
|
85
93
|
}, [running, followOutput, listHandle]);
|
86
94
|
|
95
|
+
const loaded = useRef(false);
|
87
96
|
const handleAtBottomStateChange = useCallback(
|
88
97
|
(atBottom: boolean) => {
|
89
|
-
if (running) {
|
98
|
+
if (loaded.current && running) {
|
90
99
|
setFollowOutput(atBottom);
|
91
100
|
}
|
101
|
+
loaded.current = true;
|
92
102
|
},
|
93
|
-
[running, setFollowOutput],
|
103
|
+
[running, setFollowOutput, followOutput],
|
94
104
|
);
|
95
105
|
|
96
106
|
const onkeydown = useCallback(
|
@@ -148,7 +158,7 @@ export const SampleList: FC<SampleListProps> = memo((props) => {
|
|
148
158
|
return null;
|
149
159
|
}
|
150
160
|
},
|
151
|
-
[showSample],
|
161
|
+
[showSample, gridColumnsTemplate],
|
152
162
|
);
|
153
163
|
|
154
164
|
const { input, limit, answer, target } = gridColumns(samplesDescriptor);
|
@@ -210,8 +220,11 @@ export const SampleList: FC<SampleListProps> = memo((props) => {
|
|
210
220
|
data={items}
|
211
221
|
defaultItemHeight={50}
|
212
222
|
itemContent={renderRow}
|
213
|
-
followOutput={
|
223
|
+
followOutput={(_atBottom: boolean) => {
|
224
|
+
return followOutput;
|
225
|
+
}}
|
214
226
|
atBottomStateChange={handleAtBottomStateChange}
|
227
|
+
atBottomThreshold={30}
|
215
228
|
increaseViewportBy={{ top: 300, bottom: 300 }}
|
216
229
|
overscan={{
|
217
230
|
main: 10,
|
@@ -223,7 +236,11 @@ export const SampleList: FC<SampleListProps> = memo((props) => {
|
|
223
236
|
isScrolling={isScrolling}
|
224
237
|
restoreStateFrom={getRestoreState()}
|
225
238
|
/>
|
226
|
-
<SampleFooter
|
239
|
+
<SampleFooter
|
240
|
+
sampleCount={sampleCount}
|
241
|
+
totalSampleCount={totalItemCount}
|
242
|
+
running={running}
|
243
|
+
/>
|
227
244
|
</div>
|
228
245
|
);
|
229
246
|
});
|
@@ -69,7 +69,7 @@ export const SampleRow: FC<SampleRowProps> = ({
|
|
69
69
|
styles.wrapAnywhere,
|
70
70
|
)}
|
71
71
|
>
|
72
|
-
{inputString(sample.input).join(" ")}
|
72
|
+
<MarkdownDiv markdown={inputString(sample.input).join(" ")} />
|
73
73
|
</div>
|
74
74
|
<div className={clsx("sample-target", "three-line-clamp", styles.cell)}>
|
75
75
|
<MarkdownDiv
|
@@ -1,8 +1,7 @@
|
|
1
|
-
import { FC
|
1
|
+
import { FC } from "react";
|
2
2
|
import { SampleSummary } from "../../api/types";
|
3
3
|
|
4
|
-
import {
|
5
|
-
import styles from "./SampleScores.module.css";
|
4
|
+
import { getScoreDescriptorForValues } from "../descriptor/score/ScoreDescriptor";
|
6
5
|
|
7
6
|
interface SampleScoresProps {
|
8
7
|
sample: SampleSummary;
|
@@ -10,24 +9,14 @@ interface SampleScoresProps {
|
|
10
9
|
}
|
11
10
|
|
12
11
|
export const SampleScores: FC<SampleScoresProps> = ({ sample, scorer }) => {
|
13
|
-
const
|
14
|
-
|
15
|
-
|
16
|
-
.scorerDescriptor(sample, { scorer, name: scorer })
|
17
|
-
.scores()
|
18
|
-
: samplesDescriptor?.selectedScorerDescriptor(sample)?.scores();
|
19
|
-
|
20
|
-
if (scores?.length === 1) {
|
21
|
-
return scores[0].rendered();
|
22
|
-
} else {
|
23
|
-
const rows = scores?.map((score) => {
|
24
|
-
return (
|
25
|
-
<Fragment>
|
26
|
-
<div style={{ opacity: "0.7" }}>{score.name}</div>
|
27
|
-
<div>{score.rendered()}</div>
|
28
|
-
</Fragment>
|
29
|
-
);
|
30
|
-
});
|
31
|
-
return <div className={styles.grid}>{rows}</div>;
|
12
|
+
const scoreData = sample.scores?.[scorer];
|
13
|
+
if (!scoreData) {
|
14
|
+
return undefined;
|
32
15
|
}
|
16
|
+
|
17
|
+
const scorerDescriptor = getScoreDescriptorForValues(
|
18
|
+
[scoreData.value],
|
19
|
+
[typeof scoreData.value],
|
20
|
+
);
|
21
|
+
return scorerDescriptor?.render(scoreData.value);
|
33
22
|
};
|