inspect-ai 0.3.92__py3-none-any.whl → 0.3.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +27 -0
- inspect_ai/_display/textual/widgets/samples.py +3 -3
- inspect_ai/_display/textual/widgets/transcript.py +3 -29
- inspect_ai/_eval/eval.py +19 -2
- inspect_ai/_eval/evalset.py +4 -1
- inspect_ai/_eval/run.py +41 -0
- inspect_ai/_eval/task/generate.py +38 -44
- inspect_ai/_eval/task/log.py +26 -28
- inspect_ai/_eval/task/run.py +23 -27
- inspect_ai/_util/answer.py +26 -0
- inspect_ai/_util/constants.py +0 -1
- inspect_ai/_util/local_server.py +398 -0
- inspect_ai/_util/working.py +10 -4
- inspect_ai/_view/www/dist/assets/index.css +173 -159
- inspect_ai/_view/www/dist/assets/index.js +1417 -1142
- inspect_ai/_view/www/log-schema.json +379 -3
- inspect_ai/_view/www/package.json +1 -1
- inspect_ai/_view/www/src/@types/log.d.ts +93 -14
- inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
- inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
- inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
- inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
- inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
- inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
- inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
- inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
- inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
- inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
- inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
- inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
- inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
- inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
- inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
- inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
- inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
- inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
- inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
- inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
- inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
- inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
- inspect_ai/_view/www/src/components/Card.css +0 -1
- inspect_ai/_view/www/src/constants.ts +2 -0
- inspect_ai/_view/www/src/utils/numeric.ts +17 -0
- inspect_ai/agent/_agent.py +3 -3
- inspect_ai/agent/_as_solver.py +22 -12
- inspect_ai/agent/_as_tool.py +20 -6
- inspect_ai/agent/_handoff.py +12 -1
- inspect_ai/agent/_react.py +4 -3
- inspect_ai/agent/_run.py +16 -3
- inspect_ai/agent/_types.py +9 -0
- inspect_ai/dataset/_dataset.py +6 -3
- inspect_ai/log/__init__.py +14 -0
- inspect_ai/log/_convert.py +4 -9
- inspect_ai/log/_file.py +56 -0
- inspect_ai/log/_log.py +99 -0
- inspect_ai/log/_recorders/__init__.py +2 -0
- inspect_ai/log/_recorders/buffer/database.py +12 -11
- inspect_ai/log/_recorders/buffer/filestore.py +2 -2
- inspect_ai/log/_recorders/buffer/types.py +2 -2
- inspect_ai/log/_recorders/eval.py +20 -65
- inspect_ai/log/_recorders/file.py +28 -6
- inspect_ai/log/_recorders/recorder.py +7 -0
- inspect_ai/log/_recorders/types.py +1 -23
- inspect_ai/log/_samples.py +14 -25
- inspect_ai/log/_transcript.py +84 -36
- inspect_ai/log/_tree.py +118 -0
- inspect_ai/log/_util.py +52 -0
- inspect_ai/model/__init__.py +5 -1
- inspect_ai/model/_call_tools.py +72 -44
- inspect_ai/model/_generate_config.py +14 -8
- inspect_ai/model/_model.py +66 -88
- inspect_ai/model/_model_output.py +25 -0
- inspect_ai/model/_openai.py +2 -0
- inspect_ai/model/_providers/anthropic.py +13 -23
- inspect_ai/model/_providers/hf.py +27 -1
- inspect_ai/model/_providers/openai_o1.py +8 -2
- inspect_ai/model/_providers/providers.py +18 -4
- inspect_ai/model/_providers/sglang.py +247 -0
- inspect_ai/model/_providers/vllm.py +211 -400
- inspect_ai/scorer/_choice.py +1 -2
- inspect_ai/solver/__init__.py +7 -2
- inspect_ai/solver/_basic_agent.py +3 -10
- inspect_ai/solver/_chain.py +1 -1
- inspect_ai/solver/_fork.py +1 -1
- inspect_ai/solver/_multiple_choice.py +5 -22
- inspect_ai/solver/_plan.py +2 -2
- inspect_ai/solver/_task_state.py +26 -88
- inspect_ai/solver/_transcript.py +6 -7
- inspect_ai/tool/_json_rpc_helpers.py +45 -17
- inspect_ai/tool/_mcp/_mcp.py +8 -5
- inspect_ai/tool/_mcp/_sandbox.py +8 -2
- inspect_ai/tool/_mcp/server.py +3 -1
- inspect_ai/tool/_tool_call.py +4 -1
- inspect_ai/tool/_tool_support_helpers.py +51 -12
- inspect_ai/tool/_tools/_bash_session.py +190 -68
- inspect_ai/tool/_tools/_computer/_computer.py +25 -1
- inspect_ai/tool/_tools/_execute.py +4 -1
- inspect_ai/tool/_tools/_text_editor.py +4 -3
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
- inspect_ai/util/__init__.py +16 -0
- inspect_ai/util/_anyio.py +11 -0
- inspect_ai/util/_collect.py +50 -0
- inspect_ai/util/_limit.py +393 -0
- inspect_ai/util/_limited_conversation.py +57 -0
- inspect_ai/util/_span.py +58 -0
- inspect_ai/util/_subtask.py +27 -42
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/RECORD +120 -134
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/WHEEL +1 -1
- inspect_ai/_display/core/group.py +0 -79
- inspect_ai/solver/_limit.py +0 -39
- inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
- inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
- inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
- inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
- inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
- inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
- inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
- inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
- inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_computer/test_args.py +0 -151
- /inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,6 @@ import { FC } from "react";
|
|
3
3
|
import clsx from "clsx";
|
4
4
|
import { EvalModelConfig, EvalSpec } from "../../@types/log";
|
5
5
|
import { Card, CardBody, CardHeader } from "../../components/Card";
|
6
|
-
import { ApplicationIcons } from "../appearance/icons";
|
7
6
|
import { MetaDataGrid } from "../content/MetaDataGrid";
|
8
7
|
import styles from "./ModelCard.module.css";
|
9
8
|
|
@@ -33,7 +32,7 @@ export const ModelCard: FC<ModelCardProps> = ({ evalSpec }) => {
|
|
33
32
|
|
34
33
|
return (
|
35
34
|
<Card>
|
36
|
-
<CardHeader
|
35
|
+
<CardHeader label="Models" />
|
37
36
|
<CardBody id={"task-model-card-body"}>
|
38
37
|
<div className={styles.container}>
|
39
38
|
{Object.keys(modelsInfo || {}).map((modelKey) => {
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import { FC } from "react";
|
2
2
|
import { EvalPlan, EvalScore, EvalSpec } from "../../@types/log";
|
3
3
|
import { Card, CardBody, CardHeader } from "../../components/Card";
|
4
|
-
import {
|
4
|
+
import { MetaDataView } from "../content/MetaDataView";
|
5
5
|
import { PlanDetailView } from "./PlanDetailView";
|
6
6
|
|
7
7
|
interface PlanCardProps {
|
@@ -14,12 +14,34 @@ interface PlanCardProps {
|
|
14
14
|
* Renders the plan card
|
15
15
|
*/
|
16
16
|
export const PlanCard: FC<PlanCardProps> = ({ evalSpec, evalPlan, scores }) => {
|
17
|
+
const metadata = evalSpec?.metadata || {};
|
18
|
+
|
17
19
|
return (
|
18
|
-
|
19
|
-
<
|
20
|
-
|
21
|
-
<
|
22
|
-
|
23
|
-
|
20
|
+
<>
|
21
|
+
<Card>
|
22
|
+
<CardHeader label="Summary" />
|
23
|
+
<CardBody id={"task-plan-card-body"}>
|
24
|
+
<PlanDetailView
|
25
|
+
evaluation={evalSpec}
|
26
|
+
plan={evalPlan}
|
27
|
+
scores={scores}
|
28
|
+
/>
|
29
|
+
</CardBody>
|
30
|
+
</Card>
|
31
|
+
|
32
|
+
{Object.keys(metadata).length > 0 && (
|
33
|
+
<Card>
|
34
|
+
<CardHeader label="Metadata" />
|
35
|
+
<CardBody id={"task-metadata`"}>
|
36
|
+
<MetaDataView
|
37
|
+
key={`plan-md-metadata`}
|
38
|
+
className={"text-size-small"}
|
39
|
+
entries={metadata}
|
40
|
+
tableOptions="sm"
|
41
|
+
/>
|
42
|
+
</CardBody>
|
43
|
+
</Card>
|
44
|
+
)}
|
45
|
+
</>
|
24
46
|
);
|
25
47
|
};
|
@@ -1,14 +1,10 @@
|
|
1
1
|
import { EvalPlan, EvalScore, EvalSpec, Params2 } from "../../@types/log";
|
2
|
-
import { toTitleCase } from "../../utils/format";
|
3
|
-
import { ghCommitUrl } from "../../utils/git";
|
4
|
-
import { MetaDataView } from "../content/MetaDataView";
|
5
2
|
import { DatasetDetailView } from "./DatasetDetailView";
|
6
3
|
import { ScorerDetailView } from "./ScorerDetailView";
|
7
4
|
import { SolversDetailView } from "./SolverDetailView";
|
8
5
|
|
9
6
|
import clsx from "clsx";
|
10
7
|
import { FC, ReactNode } from "react";
|
11
|
-
import { kModelNone } from "../../constants";
|
12
8
|
import styles from "./PlanDetailView.module.css";
|
13
9
|
|
14
10
|
interface PlanDetailViewProps {
|
@@ -26,71 +22,7 @@ export const PlanDetailView: FC<PlanDetailViewProps> = ({
|
|
26
22
|
return null;
|
27
23
|
}
|
28
24
|
|
29
|
-
// Add configuration
|
30
|
-
const config: Record<string, unknown> = {};
|
31
|
-
Object.entries(evaluation?.config || {}).forEach((entry) => {
|
32
|
-
const key = entry[0];
|
33
|
-
const value = entry[1];
|
34
|
-
config[key] = value;
|
35
|
-
});
|
36
|
-
|
37
25
|
const steps = plan?.steps;
|
38
|
-
const metadata = evaluation?.metadata;
|
39
|
-
const revision = evaluation?.revision;
|
40
|
-
const packages = evaluation?.packages;
|
41
|
-
const model_args = evaluation?.model_args;
|
42
|
-
const task_args = evaluation?.task_args;
|
43
|
-
const generate_config = plan?.config;
|
44
|
-
|
45
|
-
const taskInformation: Record<string, unknown> = {
|
46
|
-
["Task ID"]: evaluation?.task_id,
|
47
|
-
["Run ID"]: evaluation?.run_id,
|
48
|
-
};
|
49
|
-
if (revision) {
|
50
|
-
taskInformation[
|
51
|
-
`${revision.type ? `${toTitleCase(revision.type)} ` : ""}Revision`
|
52
|
-
] = {
|
53
|
-
_html: (
|
54
|
-
<a href={ghCommitUrl(revision.origin, revision.commit)}>
|
55
|
-
{revision.commit}
|
56
|
-
</a>
|
57
|
-
),
|
58
|
-
};
|
59
|
-
}
|
60
|
-
if (packages) {
|
61
|
-
const names = Object.keys(packages).map((key) => {
|
62
|
-
return `${key} ${packages[key]}`;
|
63
|
-
});
|
64
|
-
|
65
|
-
if (names.length === 1) {
|
66
|
-
taskInformation["Inspect"] = names[0];
|
67
|
-
} else {
|
68
|
-
taskInformation["Inspect"] = names;
|
69
|
-
}
|
70
|
-
}
|
71
|
-
if (evaluation.tags) {
|
72
|
-
taskInformation["Tags"] = evaluation.tags.join(", ");
|
73
|
-
}
|
74
|
-
|
75
|
-
if (evaluation?.model && evaluation.model !== kModelNone) {
|
76
|
-
config["model"] = evaluation.model;
|
77
|
-
}
|
78
|
-
|
79
|
-
if (evaluation?.model_base_url) {
|
80
|
-
config["model_base_url"] = evaluation.model_base_url;
|
81
|
-
}
|
82
|
-
|
83
|
-
if (evaluation?.sandbox) {
|
84
|
-
if (Array.isArray(evaluation?.sandbox)) {
|
85
|
-
config["sandbox"] = evaluation.sandbox[0];
|
86
|
-
if (evaluation.sandbox[1]) {
|
87
|
-
config["sandbox_config"] = evaluation.sandbox[1];
|
88
|
-
}
|
89
|
-
} else {
|
90
|
-
config["sandbox"] = evaluation?.sandbox.type;
|
91
|
-
config["sandbox_config"] = evaluation?.sandbox.config;
|
92
|
-
}
|
93
|
-
}
|
94
26
|
|
95
27
|
const taskColumns: {
|
96
28
|
title: string;
|
@@ -148,117 +80,12 @@ export const PlanDetailView: FC<PlanDetailViewProps> = ({
|
|
148
80
|
}
|
149
81
|
}
|
150
82
|
|
151
|
-
// Compute the column style for the remaining (either 1 or 2 columns wide)
|
152
|
-
const metadataColumns: {
|
153
|
-
title: string;
|
154
|
-
className: string;
|
155
|
-
contents: ReactNode;
|
156
|
-
}[] = [];
|
157
|
-
const cols = colCount(
|
158
|
-
metadataColumns,
|
159
|
-
task_args,
|
160
|
-
model_args,
|
161
|
-
config,
|
162
|
-
metadata,
|
163
|
-
);
|
164
|
-
|
165
|
-
metadataColumns.push({
|
166
|
-
title: "Task Information",
|
167
|
-
className: cols === 1 ? styles.oneCol : styles.twoCol,
|
168
|
-
contents: (
|
169
|
-
<MetaDataView
|
170
|
-
key={`plan-md-task`}
|
171
|
-
className={"text-size-small"}
|
172
|
-
entries={taskInformation}
|
173
|
-
tableOptions="sm"
|
174
|
-
/>
|
175
|
-
),
|
176
|
-
});
|
177
|
-
|
178
|
-
if (task_args && Object.keys(task_args).length > 0) {
|
179
|
-
metadataColumns.push({
|
180
|
-
title: "Task Args",
|
181
|
-
className: cols === 1 ? styles.oneCol : styles.twoCol,
|
182
|
-
contents: (
|
183
|
-
<MetaDataView
|
184
|
-
key={`plan-md-task-args`}
|
185
|
-
className={"text-size-small"}
|
186
|
-
entries={task_args as Record<string, unknown>}
|
187
|
-
tableOptions="sm"
|
188
|
-
/>
|
189
|
-
),
|
190
|
-
});
|
191
|
-
}
|
192
|
-
if (model_args && Object.keys(model_args).length > 0) {
|
193
|
-
metadataColumns.push({
|
194
|
-
title: "Model Args",
|
195
|
-
className: cols === 1 ? styles.oneCol : styles.twoCol,
|
196
|
-
contents: (
|
197
|
-
<MetaDataView
|
198
|
-
key={`plan-md-model-args`}
|
199
|
-
className={"text-size-small"}
|
200
|
-
entries={model_args as Record<string, unknown>}
|
201
|
-
tableOptions="sm"
|
202
|
-
/>
|
203
|
-
),
|
204
|
-
});
|
205
|
-
}
|
206
|
-
|
207
|
-
if (config && Object.keys(config).length > 0) {
|
208
|
-
metadataColumns.push({
|
209
|
-
title: "Configuration",
|
210
|
-
className: cols === 1 ? styles.oneCol : styles.twoCol,
|
211
|
-
contents: (
|
212
|
-
<MetaDataView
|
213
|
-
key={`plan-md-config`}
|
214
|
-
className={"text-size-small"}
|
215
|
-
entries={config}
|
216
|
-
tableOptions="sm"
|
217
|
-
/>
|
218
|
-
),
|
219
|
-
});
|
220
|
-
}
|
221
|
-
|
222
|
-
if (generate_config && Object.keys(generate_config).length > 0) {
|
223
|
-
const generate_record: Record<string, unknown> = Object.fromEntries(
|
224
|
-
Object.entries(generate_config),
|
225
|
-
);
|
226
|
-
|
227
|
-
metadataColumns.push({
|
228
|
-
title: "Generate Config",
|
229
|
-
className: cols === 1 ? styles.oneCol : styles.twoCol,
|
230
|
-
contents: (
|
231
|
-
<MetaDataView
|
232
|
-
key={`plan-md-generate-config`}
|
233
|
-
className={"text-size-small"}
|
234
|
-
entries={generate_record}
|
235
|
-
tableOptions="sm"
|
236
|
-
/>
|
237
|
-
),
|
238
|
-
});
|
239
|
-
}
|
240
|
-
|
241
|
-
if (metadata && Object.keys(metadata).length > 0) {
|
242
|
-
metadataColumns.push({
|
243
|
-
title: "Metadata",
|
244
|
-
className: cols === 1 ? styles.oneCol : styles.twoCol,
|
245
|
-
contents: (
|
246
|
-
<MetaDataView
|
247
|
-
key={`plan-md-metadata`}
|
248
|
-
className={"text-size-small"}
|
249
|
-
entries={metadata}
|
250
|
-
tableOptions="sm"
|
251
|
-
/>
|
252
|
-
),
|
253
|
-
});
|
254
|
-
}
|
255
|
-
|
256
83
|
return (
|
257
84
|
<div className={styles.container}>
|
258
85
|
<div
|
259
86
|
className={styles.grid}
|
260
87
|
style={{
|
261
|
-
gridTemplateColumns: `repeat(${taskColumns.length},
|
88
|
+
gridTemplateColumns: `repeat(${taskColumns.length}, fit-content(50%))`,
|
262
89
|
}}
|
263
90
|
>
|
264
91
|
{taskColumns.map((col) => {
|
@@ -273,34 +100,10 @@ export const PlanDetailView: FC<PlanDetailViewProps> = ({
|
|
273
100
|
);
|
274
101
|
})}
|
275
102
|
</div>
|
276
|
-
|
277
|
-
<div className={clsx(styles.row)}>
|
278
|
-
{metadataColumns.map((col) => {
|
279
|
-
return (
|
280
|
-
<PlanColumn
|
281
|
-
title={col.title}
|
282
|
-
className={col.className}
|
283
|
-
key={`plan-col-${col.title}`}
|
284
|
-
>
|
285
|
-
{col.contents}
|
286
|
-
</PlanColumn>
|
287
|
-
);
|
288
|
-
})}
|
289
|
-
</div>
|
290
103
|
</div>
|
291
104
|
);
|
292
105
|
};
|
293
106
|
|
294
|
-
const colCount = (...other: unknown[]) => {
|
295
|
-
let count = 0;
|
296
|
-
for (const o in other) {
|
297
|
-
if (o && Object.keys(o).length > 0) {
|
298
|
-
count++;
|
299
|
-
}
|
300
|
-
}
|
301
|
-
return count;
|
302
|
-
};
|
303
|
-
|
304
107
|
interface PlanColumnProps {
|
305
108
|
title: string;
|
306
109
|
className: string | string[];
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import { Value2 } from "../../../../@types/log";
|
2
2
|
import { kScoreTypeNumeric } from "../../../../constants";
|
3
3
|
import { formatDecimalNoTrailingZeroes } from "../../../../utils/format";
|
4
|
+
import { compareWithNan } from "../../../../utils/numeric";
|
4
5
|
import { ScoreDescriptor } from "../types";
|
5
6
|
|
6
7
|
export const numericScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
|
@@ -14,7 +15,7 @@ export const numericScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
|
|
14
15
|
max: Math.max(...onlyNumeric),
|
15
16
|
compare: (a, b) => {
|
16
17
|
if (typeof a.value === "number" && typeof b.value === "number") {
|
17
|
-
return a.value
|
18
|
+
return compareWithNan(a.value, b.value);
|
18
19
|
} else {
|
19
20
|
console.warn("Comparing non-numerics using a numeric score descriptor");
|
20
21
|
return 0;
|
@@ -0,0 +1,174 @@
|
|
1
|
+
import clsx from "clsx";
|
2
|
+
import { FC } from "react";
|
3
|
+
import { SpanBeginEvent } from "../../../@types/log";
|
4
|
+
import { formatDateTime } from "../../../utils/format";
|
5
|
+
import { EventPanel } from "./event/EventPanel";
|
6
|
+
import { TranscriptComponent } from "./TranscriptView";
|
7
|
+
import { kSandboxSignalName } from "./transform/fixups";
|
8
|
+
import { EventNode } from "./types";
|
9
|
+
|
10
|
+
interface SpanEventViewProps {
|
11
|
+
id: string;
|
12
|
+
event: SpanBeginEvent;
|
13
|
+
children: EventNode[];
|
14
|
+
className?: string | string[];
|
15
|
+
}
|
16
|
+
|
17
|
+
/**
|
18
|
+
* Renders the SpanEventView component.
|
19
|
+
*/
|
20
|
+
export const SpanEventView: FC<SpanEventViewProps> = ({
|
21
|
+
id,
|
22
|
+
event,
|
23
|
+
children,
|
24
|
+
className,
|
25
|
+
}) => {
|
26
|
+
const descriptor = spanDescriptor(event);
|
27
|
+
const title =
|
28
|
+
descriptor.name ||
|
29
|
+
`${event.type ? event.type + ": " : "Step: "}${event.name}`;
|
30
|
+
const text = summarize(children);
|
31
|
+
|
32
|
+
return (
|
33
|
+
<EventPanel
|
34
|
+
id={`span-${event.name}-${id}`}
|
35
|
+
className={clsx("transcript-span", className)}
|
36
|
+
title={title}
|
37
|
+
subTitle={formatDateTime(new Date(event.timestamp))}
|
38
|
+
text={text}
|
39
|
+
collapse={descriptor.collapse}
|
40
|
+
icon={descriptor.icon}
|
41
|
+
>
|
42
|
+
<TranscriptComponent
|
43
|
+
id={`span|${event.name}|${id}`}
|
44
|
+
eventNodes={children}
|
45
|
+
/>
|
46
|
+
</EventPanel>
|
47
|
+
);
|
48
|
+
};
|
49
|
+
|
50
|
+
const summarize = (children: EventNode[]) => {
|
51
|
+
if (children.length === 0) {
|
52
|
+
return "(no events)";
|
53
|
+
}
|
54
|
+
|
55
|
+
const formatEvent = (event: string, count: number) => {
|
56
|
+
if (count === 1) {
|
57
|
+
return `${count} ${event} event`;
|
58
|
+
} else {
|
59
|
+
return `${count} ${event} events`;
|
60
|
+
}
|
61
|
+
};
|
62
|
+
|
63
|
+
// Count the types
|
64
|
+
const typeCount: Record<string, number> = {};
|
65
|
+
children.forEach((child) => {
|
66
|
+
const currentCount = typeCount[child.event.event] || 0;
|
67
|
+
typeCount[child.event.event] = currentCount + 1;
|
68
|
+
});
|
69
|
+
|
70
|
+
// Try to summarize event types
|
71
|
+
const numberOfTypes = Object.keys(typeCount).length;
|
72
|
+
if (numberOfTypes < 3) {
|
73
|
+
return Object.keys(typeCount)
|
74
|
+
.map((key) => {
|
75
|
+
return formatEvent(key, typeCount[key]);
|
76
|
+
})
|
77
|
+
.join(", ");
|
78
|
+
}
|
79
|
+
|
80
|
+
// To many types, just return the number of events
|
81
|
+
if (children.length === 1) {
|
82
|
+
return "1 event";
|
83
|
+
} else {
|
84
|
+
return `${children.length} events`;
|
85
|
+
}
|
86
|
+
};
|
87
|
+
|
88
|
+
/**
|
89
|
+
* Returns a descriptor object containing icon and style based on the event type and name.
|
90
|
+
*/
|
91
|
+
const spanDescriptor = (
|
92
|
+
event: SpanBeginEvent,
|
93
|
+
): { icon?: string; name?: string; endSpace?: boolean; collapse?: boolean } => {
|
94
|
+
const rootStepDescriptor = {
|
95
|
+
endSpace: true,
|
96
|
+
};
|
97
|
+
|
98
|
+
if (event.type === "solver") {
|
99
|
+
switch (event.name) {
|
100
|
+
case "chain_of_thought":
|
101
|
+
return {
|
102
|
+
...rootStepDescriptor,
|
103
|
+
collapse: false,
|
104
|
+
};
|
105
|
+
case "generate":
|
106
|
+
return {
|
107
|
+
...rootStepDescriptor,
|
108
|
+
collapse: false,
|
109
|
+
};
|
110
|
+
case "self_critique":
|
111
|
+
return {
|
112
|
+
...rootStepDescriptor,
|
113
|
+
collapse: false,
|
114
|
+
};
|
115
|
+
case "system_message":
|
116
|
+
return {
|
117
|
+
...rootStepDescriptor,
|
118
|
+
collapse: true,
|
119
|
+
};
|
120
|
+
case "use_tools":
|
121
|
+
return {
|
122
|
+
...rootStepDescriptor,
|
123
|
+
collapse: false,
|
124
|
+
};
|
125
|
+
case "multiple_choice":
|
126
|
+
return {
|
127
|
+
...rootStepDescriptor,
|
128
|
+
collapse: false,
|
129
|
+
};
|
130
|
+
default:
|
131
|
+
return {
|
132
|
+
...rootStepDescriptor,
|
133
|
+
collapse: false,
|
134
|
+
};
|
135
|
+
}
|
136
|
+
} else if (event.type === "scorer") {
|
137
|
+
return {
|
138
|
+
...rootStepDescriptor,
|
139
|
+
collapse: false,
|
140
|
+
};
|
141
|
+
} else if (event.event === "span_begin") {
|
142
|
+
if (event.span_id === kSandboxSignalName) {
|
143
|
+
return {
|
144
|
+
...rootStepDescriptor,
|
145
|
+
name: "Sandbox Events",
|
146
|
+
collapse: true,
|
147
|
+
};
|
148
|
+
} else if (event.name === "init") {
|
149
|
+
return {
|
150
|
+
...rootStepDescriptor,
|
151
|
+
name: "Init",
|
152
|
+
collapse: true,
|
153
|
+
};
|
154
|
+
} else {
|
155
|
+
return {
|
156
|
+
...rootStepDescriptor,
|
157
|
+
collapse: false,
|
158
|
+
};
|
159
|
+
}
|
160
|
+
} else {
|
161
|
+
switch (event.name) {
|
162
|
+
case "sample_init":
|
163
|
+
return {
|
164
|
+
...rootStepDescriptor,
|
165
|
+
name: "Sample Init",
|
166
|
+
collapse: true,
|
167
|
+
};
|
168
|
+
default:
|
169
|
+
return {
|
170
|
+
endSpace: false,
|
171
|
+
};
|
172
|
+
}
|
173
|
+
}
|
174
|
+
};
|
@@ -4,7 +4,7 @@ import { resolveToolInput } from "../chat/tools/tool";
|
|
4
4
|
import { ToolCallView } from "../chat/tools/ToolCallView";
|
5
5
|
import { ApprovalEventView } from "./ApprovalEventView";
|
6
6
|
import { EventPanel } from "./event/EventPanel";
|
7
|
-
import {
|
7
|
+
import { TranscriptComponent } from "./TranscriptView";
|
8
8
|
|
9
9
|
import clsx from "clsx";
|
10
10
|
import { FC, useMemo } from "react";
|
@@ -12,11 +12,12 @@ import { PulsingDots } from "../../../components/PulsingDots";
|
|
12
12
|
import { ChatView } from "../chat/ChatView";
|
13
13
|
import { formatTiming, formatTitle } from "./event/utils";
|
14
14
|
import styles from "./ToolEventView.module.css";
|
15
|
+
import { EventNode } from "./types";
|
15
16
|
|
16
17
|
interface ToolEventViewProps {
|
17
18
|
id: string;
|
18
19
|
event: ToolEvent;
|
19
|
-
|
20
|
+
children: EventNode[];
|
20
21
|
className?: string | string[];
|
21
22
|
}
|
22
23
|
|
@@ -26,7 +27,7 @@ interface ToolEventViewProps {
|
|
26
27
|
export const ToolEventView: FC<ToolEventViewProps> = ({
|
27
28
|
id,
|
28
29
|
event,
|
29
|
-
|
30
|
+
children,
|
30
31
|
className,
|
31
32
|
}) => {
|
32
33
|
// Extract tool input
|
@@ -92,13 +93,12 @@ export const ToolEventView: FC<ToolEventViewProps> = ({
|
|
92
93
|
</div>
|
93
94
|
) : undefined}
|
94
95
|
</div>
|
95
|
-
{
|
96
|
-
<
|
97
|
-
id={`${id}-subtask`}
|
96
|
+
{children.length > 0 ? (
|
97
|
+
<TranscriptComponent
|
98
98
|
data-name="Transcript"
|
99
|
+
id={`${id}-subtask`}
|
100
|
+
eventNodes={children}
|
99
101
|
data-default={event.failed || event.agent ? true : null}
|
100
|
-
events={event.events}
|
101
|
-
depth={depth + 1}
|
102
102
|
/>
|
103
103
|
) : (
|
104
104
|
""
|
@@ -17,6 +17,7 @@ import { ToolEventView } from "./ToolEventView";
|
|
17
17
|
import { EventNode } from "./types";
|
18
18
|
|
19
19
|
import clsx from "clsx";
|
20
|
+
import { SpanEventView } from "./SpanEventView";
|
20
21
|
import styles from "./TranscriptView.module.css";
|
21
22
|
import { TranscriptVirtualListComponent } from "./TranscriptVirtualListComponent";
|
22
23
|
import { fixupEventStream } from "./transform/fixups";
|
@@ -64,7 +65,6 @@ export const TranscriptVirtualList: FC<TranscriptVirtualListProps> = memo(
|
|
64
65
|
const eventNodes = useMemo(() => {
|
65
66
|
const resolvedEvents = fixupEventStream(events, !running);
|
66
67
|
const eventNodes = treeifyEvents(resolvedEvents, depth || 0);
|
67
|
-
|
68
68
|
return eventNodes;
|
69
69
|
}, [events, depth]);
|
70
70
|
|
@@ -201,6 +201,16 @@ export const RenderedEventNode: FC<RenderedEventNodeProps> = memo(
|
|
201
201
|
<StateEventView id={id} event={node.event} className={className} />
|
202
202
|
);
|
203
203
|
|
204
|
+
case "span_begin":
|
205
|
+
return (
|
206
|
+
<SpanEventView
|
207
|
+
id={id}
|
208
|
+
event={node.event}
|
209
|
+
children={node.children}
|
210
|
+
className={className}
|
211
|
+
/>
|
212
|
+
);
|
213
|
+
|
204
214
|
case "step":
|
205
215
|
return (
|
206
216
|
<StepEventView
|
@@ -237,7 +247,7 @@ export const RenderedEventNode: FC<RenderedEventNodeProps> = memo(
|
|
237
247
|
id={id}
|
238
248
|
event={node.event}
|
239
249
|
className={className}
|
240
|
-
|
250
|
+
children={node.children}
|
241
251
|
/>
|
242
252
|
);
|
243
253
|
|
@@ -9,7 +9,6 @@ import {
|
|
9
9
|
import { ApplicationIcons } from "../../../appearance/icons";
|
10
10
|
import { EventNavs } from "./EventNavs";
|
11
11
|
|
12
|
-
import { ProgressBar } from "../../../../components/ProgressBar";
|
13
12
|
import { useProperty } from "../../../../state/hooks";
|
14
13
|
import styles from "./EventPanel.module.css";
|
15
14
|
|
@@ -41,7 +40,6 @@ export const EventPanel: FC<EventPanelProps> = ({
|
|
41
40
|
icon,
|
42
41
|
collapse,
|
43
42
|
children,
|
44
|
-
running,
|
45
43
|
}) => {
|
46
44
|
const [isCollapsed, setCollapsed] = useProperty(id, "collapsed", {
|
47
45
|
defaultValue: !!collapse,
|
@@ -191,7 +189,6 @@ export const EventPanel: FC<EventPanelProps> = ({
|
|
191
189
|
})}
|
192
190
|
</div>
|
193
191
|
</div>
|
194
|
-
<ProgressBar animating={!!running} />
|
195
192
|
</>
|
196
193
|
);
|
197
194
|
return card;
|