inspect-ai 0.3.80__py3-none-any.whl → 0.3.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +35 -2
- inspect_ai/_cli/util.py +44 -1
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +13 -4
- inspect_ai/_display/core/results.py +1 -1
- inspect_ai/_display/textual/widgets/task_detail.py +5 -4
- inspect_ai/_eval/eval.py +38 -1
- inspect_ai/_eval/evalset.py +5 -0
- inspect_ai/_eval/run.py +5 -2
- inspect_ai/_eval/task/log.py +53 -6
- inspect_ai/_eval/task/run.py +51 -10
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/file.py +17 -1
- inspect_ai/_util/json.py +36 -1
- inspect_ai/_view/server.py +113 -1
- inspect_ai/_view/www/App.css +1 -1
- inspect_ai/_view/www/dist/assets/index.css +518 -296
- inspect_ai/_view/www/dist/assets/index.js +38803 -36307
- inspect_ai/_view/www/eslint.config.mjs +1 -1
- inspect_ai/_view/www/log-schema.json +13 -0
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/package.json +8 -2
- inspect_ai/_view/www/src/App.tsx +151 -855
- inspect_ai/_view/www/src/api/api-browser.ts +176 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
- inspect_ai/_view/www/src/api/client-api.ts +66 -10
- inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
- inspect_ai/_view/www/src/api/types.ts +107 -2
- inspect_ai/_view/www/src/appearance/icons.ts +1 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
- inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
- inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
- inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -3
- inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
- inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
- inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
- inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
- inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
- inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
- inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
- inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
- inspect_ai/_view/www/src/index.tsx +26 -94
- inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
- inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +67 -28
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +51 -22
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +144 -90
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +82 -35
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +23 -30
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +4 -1
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +3 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +34 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +10 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +25 -17
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +2 -1
- inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +21 -3
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +20 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +105 -85
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +27 -14
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
- inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
- inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +7 -9
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +7 -11
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +8 -13
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +52 -58
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +30 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
- inspect_ai/_view/www/src/scoring/utils.ts +87 -0
- inspect_ai/_view/www/src/state/appSlice.ts +244 -0
- inspect_ai/_view/www/src/state/hooks.ts +397 -0
- inspect_ai/_view/www/src/state/logPolling.ts +196 -0
- inspect_ai/_view/www/src/state/logSlice.ts +214 -0
- inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
- inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +311 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +127 -0
- inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
- inspect_ai/_view/www/src/state/scrolling.ts +206 -0
- inspect_ai/_view/www/src/state/store.ts +168 -0
- inspect_ai/_view/www/src/state/store_filter.ts +84 -0
- inspect_ai/_view/www/src/state/utils.ts +23 -0
- inspect_ai/_view/www/src/storage/index.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +2 -0
- inspect_ai/_view/www/src/types.ts +94 -32
- inspect_ai/_view/www/src/utils/attachments.ts +58 -23
- inspect_ai/_view/www/src/utils/logger.ts +52 -0
- inspect_ai/_view/www/src/utils/polling.ts +100 -0
- inspect_ai/_view/www/src/utils/react.ts +30 -0
- inspect_ai/_view/www/src/utils/vscode.ts +1 -1
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +181 -216
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +0 -1
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +98 -39
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
- inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +11 -13
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
- inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +110 -115
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
- inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
- inspect_ai/_view/www/src/workspace/types.ts +4 -3
- inspect_ai/_view/www/src/workspace/utils.ts +4 -4
- inspect_ai/_view/www/vite.config.js +6 -0
- inspect_ai/_view/www/yarn.lock +370 -354
- inspect_ai/log/_condense.py +26 -0
- inspect_ai/log/_log.py +6 -3
- inspect_ai/log/_recorders/buffer/__init__.py +14 -0
- inspect_ai/log/_recorders/buffer/buffer.py +30 -0
- inspect_ai/log/_recorders/buffer/database.py +685 -0
- inspect_ai/log/_recorders/buffer/filestore.py +259 -0
- inspect_ai/log/_recorders/buffer/types.py +84 -0
- inspect_ai/log/_recorders/eval.py +2 -11
- inspect_ai/log/_recorders/types.py +30 -0
- inspect_ai/log/_transcript.py +27 -1
- inspect_ai/model/_call_tools.py +1 -0
- inspect_ai/model/_generate_config.py +2 -2
- inspect_ai/model/_model.py +1 -0
- inspect_ai/tool/_tool_support_helpers.py +4 -4
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
- inspect_ai/util/_subtask.py +1 -0
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/RECORD +178 -138
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.80.dist-info → inspect_ai-0.3.82.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,18 @@
|
|
1
1
|
import clsx from "clsx";
|
2
2
|
import { MarkdownDiv } from "../components/MarkdownDiv";
|
3
|
-
import { EvalSample, WorkingTime } from "../types/log";
|
3
|
+
import { EvalSample, Target, TotalTime, WorkingTime } from "../types/log";
|
4
4
|
import { arrayToString, formatTime, inputString } from "../utils/format";
|
5
|
-
import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
|
6
5
|
import { FlatSampleError } from "./error/FlatSampleErrorView";
|
7
6
|
|
8
7
|
import { FC, ReactNode } from "react";
|
8
|
+
import { SampleSummary } from "../api/types";
|
9
|
+
import { useSampleDescriptor, useScore } from "../state/hooks";
|
9
10
|
import styles from "./SampleSummaryView.module.css";
|
11
|
+
import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
|
10
12
|
|
11
13
|
interface SampleSummaryViewProps {
|
12
14
|
parent_id: string;
|
13
|
-
sample: EvalSample;
|
14
|
-
sampleDescriptor: SamplesDescriptor;
|
15
|
+
sample: SampleSummary | EvalSample;
|
15
16
|
}
|
16
17
|
|
17
18
|
interface SummaryColumn {
|
@@ -23,14 +24,73 @@ interface SummaryColumn {
|
|
23
24
|
title?: string;
|
24
25
|
}
|
25
26
|
|
27
|
+
interface SampleFields {
|
28
|
+
id: string | number;
|
29
|
+
input: string[];
|
30
|
+
target: Target;
|
31
|
+
answer?: string;
|
32
|
+
limit?: string;
|
33
|
+
working_time?: WorkingTime;
|
34
|
+
total_time?: TotalTime;
|
35
|
+
error?: string;
|
36
|
+
}
|
37
|
+
|
38
|
+
function isEvalSample(
|
39
|
+
sample: SampleSummary | EvalSample,
|
40
|
+
): sample is EvalSample {
|
41
|
+
return "choices" in sample && Array.isArray((sample as EvalSample).choices);
|
42
|
+
}
|
43
|
+
|
44
|
+
const resolveSample = (
|
45
|
+
sample: SampleSummary | EvalSample,
|
46
|
+
sampleDescriptor: SamplesDescriptor,
|
47
|
+
): SampleFields => {
|
48
|
+
const input = inputString(sample.input);
|
49
|
+
if (isEvalSample(sample) && sample.choices && sample.choices.length > 0) {
|
50
|
+
input.push("");
|
51
|
+
input.push(
|
52
|
+
...sample.choices.map((choice, index) => {
|
53
|
+
return `${String.fromCharCode(65 + index)}) ${choice}`;
|
54
|
+
}),
|
55
|
+
);
|
56
|
+
}
|
57
|
+
|
58
|
+
const target = sample.target;
|
59
|
+
const answer =
|
60
|
+
sample && sampleDescriptor
|
61
|
+
? sampleDescriptor.selectedScorerDescriptor(sample)?.answer()
|
62
|
+
: undefined;
|
63
|
+
const limit = isEvalSample(sample) ? sample.limit?.type : undefined;
|
64
|
+
const working_time = isEvalSample(sample) ? sample.working_time : undefined;
|
65
|
+
const total_time = isEvalSample(sample) ? sample.total_time : undefined;
|
66
|
+
const error = isEvalSample(sample) ? sample.error?.message : undefined;
|
67
|
+
|
68
|
+
return {
|
69
|
+
id: sample.id,
|
70
|
+
input,
|
71
|
+
target,
|
72
|
+
answer,
|
73
|
+
limit,
|
74
|
+
working_time,
|
75
|
+
total_time,
|
76
|
+
error,
|
77
|
+
};
|
78
|
+
};
|
79
|
+
|
26
80
|
/**
|
27
81
|
* Component to display a sample with relevant context and visibility control.
|
28
82
|
*/
|
29
83
|
export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
|
30
84
|
parent_id,
|
31
85
|
sample,
|
32
|
-
sampleDescriptor,
|
33
86
|
}) => {
|
87
|
+
const sampleDescriptor = useSampleDescriptor();
|
88
|
+
const currentScore = useScore();
|
89
|
+
if (!sampleDescriptor) {
|
90
|
+
return undefined;
|
91
|
+
}
|
92
|
+
const fields = resolveSample(sample, sampleDescriptor);
|
93
|
+
|
34
94
|
const input =
|
35
95
|
sampleDescriptor?.messageShape.normalized.input > 0
|
36
96
|
? Math.max(0.15, sampleDescriptor.messageShape.normalized.input)
|
@@ -47,43 +107,33 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
|
|
47
107
|
sampleDescriptor?.messageShape.normalized.limit > 0
|
48
108
|
? Math.max(0.15, sampleDescriptor.messageShape.normalized.limit)
|
49
109
|
: 0;
|
50
|
-
const timeSize =
|
110
|
+
const timeSize = fields.working_time || fields.total_time ? 0.15 : 0;
|
51
111
|
const idSize = Math.max(
|
52
112
|
2,
|
53
113
|
Math.min(10, sampleDescriptor?.messageShape.raw.id),
|
54
114
|
);
|
55
115
|
|
56
|
-
const scoreInput = inputString(sample.input);
|
57
|
-
if (sample.choices && sample.choices.length > 0) {
|
58
|
-
scoreInput.push("");
|
59
|
-
scoreInput.push(
|
60
|
-
...sample.choices.map((choice, index) => {
|
61
|
-
return `${String.fromCharCode(65 + index)}) ${choice}`;
|
62
|
-
}),
|
63
|
-
);
|
64
|
-
}
|
65
|
-
|
66
116
|
// The columns for the sample
|
67
117
|
const columns: SummaryColumn[] = [];
|
68
118
|
columns.push({
|
69
119
|
label: "Id",
|
70
|
-
value:
|
120
|
+
value: fields.id,
|
71
121
|
size: `${idSize}em`,
|
72
122
|
});
|
73
123
|
|
74
124
|
columns.push({
|
75
125
|
label: "Input",
|
76
|
-
value:
|
126
|
+
value: fields.input,
|
77
127
|
size: `${input}fr`,
|
78
128
|
clamp: true,
|
79
129
|
});
|
80
130
|
|
81
|
-
if (
|
131
|
+
if (fields.target) {
|
82
132
|
columns.push({
|
83
133
|
label: "Target",
|
84
134
|
value: (
|
85
135
|
<MarkdownDiv
|
86
|
-
markdown={arrayToString(
|
136
|
+
markdown={arrayToString(fields?.target || "none")}
|
87
137
|
className={clsx("no-last-para-padding", styles.target)}
|
88
138
|
/>
|
89
139
|
),
|
@@ -92,16 +142,12 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
|
|
92
142
|
});
|
93
143
|
}
|
94
144
|
|
95
|
-
|
96
|
-
sample && sampleDescriptor
|
97
|
-
? sampleDescriptor.selectedScorerDescriptor(sample).answer()
|
98
|
-
: undefined;
|
99
|
-
if (fullAnswer) {
|
145
|
+
if (fields.answer) {
|
100
146
|
columns.push({
|
101
147
|
label: "Answer",
|
102
148
|
value: sample ? (
|
103
149
|
<MarkdownDiv
|
104
|
-
markdown={
|
150
|
+
markdown={fields.answer}
|
105
151
|
className={clsx("no-last-para-padding", styles.answer)}
|
106
152
|
/>
|
107
153
|
) : (
|
@@ -119,20 +165,20 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
|
|
119
165
|
return `Working time: ${formatTime(working_time)}`;
|
120
166
|
};
|
121
167
|
|
122
|
-
if (
|
168
|
+
if (fields.total_time) {
|
123
169
|
columns.push({
|
124
170
|
label: "Time",
|
125
|
-
value: formatTime(
|
171
|
+
value: formatTime(fields.total_time),
|
126
172
|
size: `${timeSize}fr`,
|
127
173
|
center: true,
|
128
|
-
title: toolTip(
|
174
|
+
title: toolTip(fields.working_time),
|
129
175
|
});
|
130
176
|
}
|
131
177
|
|
132
|
-
if (
|
178
|
+
if (fields?.limit && limitSize > 0) {
|
133
179
|
columns.push({
|
134
180
|
label: "Limit",
|
135
|
-
value:
|
181
|
+
value: fields.limit,
|
136
182
|
size: `${limitSize}fr`,
|
137
183
|
center: true,
|
138
184
|
});
|
@@ -140,11 +186,11 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
|
|
140
186
|
|
141
187
|
columns.push({
|
142
188
|
label: "Score",
|
143
|
-
value:
|
144
|
-
<FlatSampleError message={
|
189
|
+
value: fields.error ? (
|
190
|
+
<FlatSampleError message={fields.error} />
|
145
191
|
) : (
|
146
|
-
|
147
|
-
|
192
|
+
sampleDescriptor?.evalDescriptor.score(sample, currentScore)?.render() ||
|
193
|
+
""
|
148
194
|
),
|
149
195
|
size: "minmax(2em, 30em)",
|
150
196
|
center: true,
|
@@ -184,6 +230,7 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
|
|
184
230
|
<div
|
185
231
|
key={`sample-summ-val-${idx}`}
|
186
232
|
className={clsx(
|
233
|
+
styles.value,
|
187
234
|
styles.wrap,
|
188
235
|
col.clamp ? "three-line-clamp" : undefined,
|
189
236
|
col.center ? styles.centerLabel : undefined,
|
@@ -1,47 +1,40 @@
|
|
1
1
|
import { FC } from "react";
|
2
2
|
import { Fragment } from "react/jsx-runtime";
|
3
|
-
import {
|
4
|
-
import {
|
3
|
+
import { SampleSummary } from "../api/types";
|
4
|
+
import { useScore, useScores } from "../state/hooks";
|
5
|
+
import { useStore } from "../state/store";
|
5
6
|
import { EpochFilter } from "./sample-tools/EpochFilter";
|
6
7
|
import { SampleFilter } from "./sample-tools/sample-filter/SampleFilter";
|
7
8
|
import { SelectScorer } from "./sample-tools/SelectScorer";
|
8
9
|
import { SortFilter } from "./sample-tools/SortFilter";
|
9
10
|
|
10
11
|
interface SampleToolsProps {
|
11
|
-
|
12
|
-
setEpoch: (epoch: string) => void;
|
13
|
-
epochs: number;
|
14
|
-
scoreFilter: ScoreFilter;
|
15
|
-
setScoreFilter: (filter: ScoreFilter) => void;
|
16
|
-
sort: string;
|
17
|
-
setSort: (sort: string) => void;
|
18
|
-
score?: ScoreLabel;
|
19
|
-
setScore: (score: ScoreLabel) => void;
|
20
|
-
scores: ScoreLabel[];
|
21
|
-
sampleDescriptor: SamplesDescriptor;
|
12
|
+
samples: SampleSummary[];
|
22
13
|
}
|
23
14
|
|
24
|
-
export const SampleTools: FC<SampleToolsProps> = ({
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
15
|
+
export const SampleTools: FC<SampleToolsProps> = ({ samples }) => {
|
16
|
+
const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
|
17
|
+
|
18
|
+
const filter = useStore((state) => state.log.filter);
|
19
|
+
const setFilter = useStore((state) => state.logActions.setFilter);
|
20
|
+
|
21
|
+
const scores = useScores();
|
22
|
+
const score = useScore();
|
23
|
+
const setScore = useStore((state) => state.logActions.setScore);
|
24
|
+
const epoch = useStore((state) => state.log.epoch);
|
25
|
+
const setEpoch = useStore((state) => state.logActions.setEpoch);
|
26
|
+
const sort = useStore((state) => state.log.sort);
|
27
|
+
const setSort = useStore((state) => state.logActions.setSort);
|
28
|
+
|
29
|
+
const epochs = selectedLogSummary?.eval.config.epochs || 1;
|
37
30
|
return (
|
38
31
|
<Fragment>
|
39
32
|
<SampleFilter
|
40
|
-
|
41
|
-
scoreFilter={
|
42
|
-
setScoreFilter={
|
33
|
+
samples={samples}
|
34
|
+
scoreFilter={filter}
|
35
|
+
setScoreFilter={setFilter}
|
43
36
|
/>
|
44
|
-
{scores
|
37
|
+
{scores?.length > 1 ? (
|
45
38
|
<SelectScorer scores={scores} score={score} setScore={setScore} />
|
46
39
|
) : undefined}
|
47
40
|
{epochs > 1 ? (
|
@@ -46,8 +46,9 @@ export const ChatMessage: FC<ChatMessageProps> = ({
|
|
46
46
|
indented ? styles.indented : undefined,
|
47
47
|
)}
|
48
48
|
>
|
49
|
-
<ExpandablePanel collapse={collapse} lines={30}>
|
49
|
+
<ExpandablePanel id={`${id}-message`} collapse={collapse} lines={30}>
|
50
50
|
<MessageContents
|
51
|
+
id={`${id}-contents`}
|
51
52
|
key={`${id}-contents`}
|
52
53
|
message={message}
|
53
54
|
toolMessages={toolMessages}
|
@@ -26,7 +26,7 @@ export const ChatMessageRenderer: ContentRenderer = {
|
|
26
26
|
render: (id, entry) => {
|
27
27
|
return {
|
28
28
|
rendered: (
|
29
|
-
<NavPills>
|
29
|
+
<NavPills id={`${id}-navpills`}>
|
30
30
|
<ChatSummary title="Last Turn" id={id} messages={entry.value} />
|
31
31
|
<ChatView title="All" id={id} messages={entry.value} />
|
32
32
|
</NavPills>
|
@@ -1,73 +1,65 @@
|
|
1
|
-
import { FC, RefObject,
|
1
|
+
import { FC, memo, ReactNode, RefObject, useMemo } from "react";
|
2
2
|
import { Messages } from "../../types/log";
|
3
3
|
|
4
|
-
import clsx from "clsx";
|
5
|
-
import { Virtuoso } from "react-virtuoso";
|
6
4
|
import { ChatMessageRow } from "./ChatMessageRow";
|
7
5
|
import { ResolvedMessage, resolveMessages } from "./messages";
|
8
6
|
|
9
|
-
import
|
7
|
+
import { LiveVirtualList } from "../../components/LiveVirtualList";
|
10
8
|
|
11
9
|
interface ChatViewVirtualListProps {
|
12
|
-
id
|
10
|
+
id: string;
|
11
|
+
className?: string | string[];
|
13
12
|
messages: Messages;
|
14
13
|
toolCallStyle: "compact" | "complete";
|
15
|
-
className?: string | string[];
|
16
14
|
indented: boolean;
|
17
15
|
numbered?: boolean;
|
18
|
-
scrollRef?: RefObject<
|
16
|
+
scrollRef?: RefObject<HTMLDivElement | null>;
|
17
|
+
running?: boolean;
|
19
18
|
}
|
20
19
|
|
21
20
|
/**
|
22
21
|
* Renders the ChatViewVirtualList component.
|
23
22
|
*/
|
24
|
-
export const ChatViewVirtualList: FC<ChatViewVirtualListProps> = (
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
23
|
+
export const ChatViewVirtualList: FC<ChatViewVirtualListProps> = memo(
|
24
|
+
({
|
25
|
+
id,
|
26
|
+
messages,
|
27
|
+
className,
|
28
|
+
toolCallStyle,
|
29
|
+
indented,
|
30
|
+
numbered = true,
|
31
|
+
scrollRef,
|
32
|
+
running,
|
33
|
+
}) => {
|
34
|
+
const collapsedMessages = useMemo(() => {
|
35
|
+
return resolveMessages(messages);
|
36
|
+
}, [messages]);
|
37
|
+
|
38
|
+
const renderRow = (index: number, item: ResolvedMessage): ReactNode => {
|
39
|
+
const number =
|
40
|
+
collapsedMessages.length > 1 && numbered ? index + 1 : undefined;
|
41
|
+
|
42
|
+
return (
|
43
|
+
<ChatMessageRow
|
44
|
+
parentName={id || "chat-virtual-list"}
|
45
|
+
number={number}
|
46
|
+
resolvedMessage={item}
|
47
|
+
indented={indented}
|
48
|
+
toolCallStyle={toolCallStyle}
|
49
|
+
/>
|
50
|
+
);
|
51
|
+
};
|
35
52
|
|
36
|
-
const renderRow = (item: ResolvedMessage, index: number) => {
|
37
|
-
const number =
|
38
|
-
collapsedMessages.length > 1 && numbered ? index + 1 : undefined;
|
39
53
|
return (
|
40
|
-
<
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
54
|
+
<LiveVirtualList<ResolvedMessage>
|
55
|
+
id="chat-virtual-list"
|
56
|
+
className={className}
|
57
|
+
scrollRef={scrollRef}
|
58
|
+
data={collapsedMessages}
|
59
|
+
renderRow={renderRow}
|
60
|
+
live={running}
|
61
|
+
showProgress={running}
|
46
62
|
/>
|
47
63
|
);
|
48
|
-
}
|
49
|
-
|
50
|
-
const result = (
|
51
|
-
<Virtuoso
|
52
|
-
customScrollParent={scrollRef?.current ? scrollRef.current : undefined}
|
53
|
-
style={{ height: "100%", width: "100%" }}
|
54
|
-
data={collapsedMessages}
|
55
|
-
itemContent={(index: number, data: ResolvedMessage) => {
|
56
|
-
return renderRow(data, index);
|
57
|
-
}}
|
58
|
-
increaseViewportBy={{ top: 1000, bottom: 1000 }}
|
59
|
-
overscan={{
|
60
|
-
main: 10,
|
61
|
-
reverse: 10,
|
62
|
-
}}
|
63
|
-
followOutput={followOutput}
|
64
|
-
atBottomStateChange={(atBottom: boolean) => {
|
65
|
-
setFollowOutput(atBottom);
|
66
|
-
}}
|
67
|
-
skipAnimationFrameInResizeObserver={true}
|
68
|
-
className={clsx(styles.list, className)}
|
69
|
-
/>
|
70
|
-
);
|
71
|
-
|
72
|
-
return result;
|
73
|
-
};
|
64
|
+
},
|
65
|
+
);
|
@@ -104,6 +104,9 @@ const messageRenderers: Record<string, MessageRenderer> = {
|
|
104
104
|
reasoning: {
|
105
105
|
render: (key, content, isLast) => {
|
106
106
|
const r = content as ContentReasoning;
|
107
|
+
if (!r.reasoning && !r.redacted) {
|
108
|
+
return undefined;
|
109
|
+
}
|
107
110
|
return (
|
108
111
|
<Fragment key={key}>
|
109
112
|
<div
|
@@ -115,7 +118,7 @@ const messageRenderers: Record<string, MessageRenderer> = {
|
|
115
118
|
>
|
116
119
|
Reasoning
|
117
120
|
</div>
|
118
|
-
<ExpandablePanel collapse={true}>
|
121
|
+
<ExpandablePanel id={`${key}-reasoning`} collapse={true}>
|
119
122
|
<MarkdownDiv
|
120
123
|
markdown={
|
121
124
|
r.redacted
|
@@ -13,12 +13,14 @@ import { ContentTool } from "../../types";
|
|
13
13
|
import styles from "./MessageContents.module.css";
|
14
14
|
|
15
15
|
interface MessageContentsProps {
|
16
|
+
id: string;
|
16
17
|
message: ChatMessageAssistant | ChatMessageSystem | ChatMessageUser;
|
17
18
|
toolMessages: ChatMessageTool[];
|
18
19
|
toolCallStyle: "compact" | "complete";
|
19
20
|
}
|
20
21
|
|
21
22
|
export const MessageContents: FC<MessageContentsProps> = ({
|
23
|
+
id,
|
22
24
|
message,
|
23
25
|
toolMessages,
|
24
26
|
toolCallStyle,
|
@@ -56,6 +58,7 @@ export const MessageContents: FC<MessageContentsProps> = ({
|
|
56
58
|
} else {
|
57
59
|
return (
|
58
60
|
<ToolCallView
|
61
|
+
id={`${id}-tool-call`}
|
59
62
|
key={`tool-call-${idx}`}
|
60
63
|
functionCall={functionCall}
|
61
64
|
input={input}
|
@@ -9,6 +9,7 @@ import {
|
|
9
9
|
ContentReasoning,
|
10
10
|
ContentText,
|
11
11
|
ContentVideo,
|
12
|
+
Events,
|
12
13
|
Messages,
|
13
14
|
} from "../../types/log";
|
14
15
|
|
@@ -65,6 +66,7 @@ export const resolveMessages = (messages: Messages) => {
|
|
65
66
|
}
|
66
67
|
|
67
68
|
const systemMessage: ChatMessageSystem = {
|
69
|
+
id: "sys-message-6815A84B062A",
|
68
70
|
role: "system",
|
69
71
|
content: systemContent,
|
70
72
|
source: "input",
|
@@ -123,3 +125,35 @@ const normalizeContent = (
|
|
123
125
|
return content;
|
124
126
|
}
|
125
127
|
};
|
128
|
+
|
129
|
+
export const messagesFromEvents = (runningEvents: Events): Messages => {
|
130
|
+
const messages: Map<
|
131
|
+
string,
|
132
|
+
ChatMessageSystem | ChatMessageUser | ChatMessageAssistant | ChatMessageTool
|
133
|
+
> = new Map();
|
134
|
+
|
135
|
+
runningEvents
|
136
|
+
.filter((e) => e.event === "model")
|
137
|
+
.forEach((e) => {
|
138
|
+
for (const m of e.input) {
|
139
|
+
const inputMessage = m as
|
140
|
+
| ChatMessageSystem
|
141
|
+
| ChatMessageUser
|
142
|
+
| ChatMessageAssistant
|
143
|
+
| ChatMessageTool;
|
144
|
+
if (inputMessage.id && !messages.has(inputMessage.id)) {
|
145
|
+
messages.set(inputMessage.id, inputMessage);
|
146
|
+
}
|
147
|
+
}
|
148
|
+
const outputMessage = e.output.choices[0].message;
|
149
|
+
if (outputMessage.id) {
|
150
|
+
messages.set(outputMessage.id, outputMessage);
|
151
|
+
}
|
152
|
+
});
|
153
|
+
|
154
|
+
if (messages.entries.length > 0) {
|
155
|
+
return messages.values().toArray();
|
156
|
+
} else {
|
157
|
+
return [];
|
158
|
+
}
|
159
|
+
};
|
@@ -10,10 +10,12 @@ import {
|
|
10
10
|
ToolCallContent,
|
11
11
|
} from "../../../types/log";
|
12
12
|
import { MessageContent } from "../MessageContent";
|
13
|
+
import styles from "./ToolCallView.module.css";
|
13
14
|
import { ToolInput } from "./ToolInput";
|
14
15
|
import { ToolTitle } from "./ToolTitle";
|
15
16
|
|
16
17
|
interface ToolCallViewProps {
|
18
|
+
id: string;
|
17
19
|
functionCall: string;
|
18
20
|
input?: string;
|
19
21
|
highlightLanguage?: string;
|
@@ -43,6 +45,7 @@ interface ToolCallViewProps {
|
|
43
45
|
* Renders the ToolCallView component.
|
44
46
|
*/
|
45
47
|
export const ToolCallView: FC<ToolCallViewProps> = ({
|
48
|
+
id,
|
46
49
|
functionCall,
|
47
50
|
input,
|
48
51
|
highlightLanguage,
|
@@ -116,7 +119,13 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
|
|
116
119
|
toolCallView={view}
|
117
120
|
/>
|
118
121
|
{hasContent ? (
|
119
|
-
<ExpandablePanel
|
122
|
+
<ExpandablePanel
|
123
|
+
id={`${id}-tool-input`}
|
124
|
+
collapse={collapse}
|
125
|
+
border={true}
|
126
|
+
lines={15}
|
127
|
+
className={styles.output}
|
128
|
+
>
|
120
129
|
<MessageContent contents={normalizedContent} />
|
121
130
|
</ExpandablePanel>
|
122
131
|
) : undefined}
|
@@ -1,46 +1,19 @@
|
|
1
1
|
import clsx from "clsx";
|
2
|
-
import {
|
3
|
-
import { FC, memo, useEffect, useRef } from "react";
|
2
|
+
import { FC } from "react";
|
4
3
|
import { MarkdownDiv } from "../../../components/MarkdownDiv";
|
5
4
|
|
5
|
+
import { usePrismHighlight } from "../../../state/hooks";
|
6
6
|
import styles from "./ToolInput.module.css";
|
7
7
|
|
8
|
-
export const useCodeHighlight = (language?: string) => {
|
9
|
-
const codeRef = useRef<HTMLElement>(null);
|
10
|
-
|
11
|
-
useEffect(() => {
|
12
|
-
if (codeRef.current && language) {
|
13
|
-
highlightElement(codeRef.current);
|
14
|
-
}
|
15
|
-
}, [language]);
|
16
|
-
|
17
|
-
return codeRef;
|
18
|
-
};
|
19
|
-
|
20
8
|
interface ToolInputProps {
|
21
9
|
highlightLanguage?: string;
|
22
10
|
contents?: string | object;
|
23
11
|
toolCallView?: { content: string };
|
24
12
|
}
|
25
|
-
export const ToolInput: FC<ToolInputProps> =
|
13
|
+
export const ToolInput: FC<ToolInputProps> = (props) => {
|
26
14
|
const { highlightLanguage, contents, toolCallView } = props;
|
27
15
|
|
28
|
-
const
|
29
|
-
const toolViewRef = useRef<HTMLDivElement>(null);
|
30
|
-
|
31
|
-
useEffect(() => {
|
32
|
-
if (toolCallView?.content && toolViewRef.current) {
|
33
|
-
requestAnimationFrame(() => {
|
34
|
-
const codeBlocks = toolViewRef.current!.querySelectorAll("pre code");
|
35
|
-
codeBlocks.forEach((block) => {
|
36
|
-
if (block.className.includes("language-")) {
|
37
|
-
block.classList.add("sourceCode");
|
38
|
-
highlightElement(block as HTMLElement);
|
39
|
-
}
|
40
|
-
});
|
41
|
-
});
|
42
|
-
}
|
43
|
-
}, [toolCallView?.content]);
|
16
|
+
const prismParentRef = usePrismHighlight(toolCallView?.content);
|
44
17
|
|
45
18
|
if (!contents && !toolCallView?.content) return null;
|
46
19
|
|
@@ -48,8 +21,8 @@ export const ToolInput: FC<ToolInputProps> = memo((props) => {
|
|
48
21
|
return (
|
49
22
|
<MarkdownDiv
|
50
23
|
markdown={toolCallView.content}
|
51
|
-
ref={
|
52
|
-
className={clsx("text-size-small", "tool-output")}
|
24
|
+
ref={prismParentRef}
|
25
|
+
className={clsx(styles.bottomPadding, "text-size-small", "tool-output")}
|
53
26
|
/>
|
54
27
|
);
|
55
28
|
}
|
@@ -58,18 +31,21 @@ export const ToolInput: FC<ToolInputProps> = memo((props) => {
|
|
58
31
|
typeof contents === "object" ? JSON.stringify(contents) : contents;
|
59
32
|
|
60
33
|
return (
|
61
|
-
<
|
62
|
-
<
|
63
|
-
|
64
|
-
className={clsx(
|
65
|
-
"source-code",
|
66
|
-
"sourceCode",
|
67
|
-
highlightLanguage ? `language-${highlightLanguage}` : undefined,
|
68
|
-
styles.outputCode,
|
69
|
-
)}
|
34
|
+
<div ref={prismParentRef}>
|
35
|
+
<pre
|
36
|
+
className={clsx("tool-output", styles.outputPre, styles.bottomMargin)}
|
70
37
|
>
|
71
|
-
|
72
|
-
|
73
|
-
|
38
|
+
<code
|
39
|
+
className={clsx(
|
40
|
+
"source-code",
|
41
|
+
"sourceCode",
|
42
|
+
highlightLanguage ? `language-${highlightLanguage}` : undefined,
|
43
|
+
styles.outputCode,
|
44
|
+
)}
|
45
|
+
>
|
46
|
+
{formattedContent}
|
47
|
+
</code>
|
48
|
+
</pre>
|
49
|
+
</div>
|
74
50
|
);
|
75
|
-
}
|
51
|
+
};
|