inspect-ai 0.3.91__py3-none-any.whl → 0.3.93__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +31 -0
- inspect_ai/_eval/eval.py +19 -2
- inspect_ai/_eval/evalset.py +4 -1
- inspect_ai/_eval/run.py +41 -0
- inspect_ai/_eval/task/generate.py +38 -44
- inspect_ai/_eval/task/log.py +26 -28
- inspect_ai/_eval/task/run.py +13 -20
- inspect_ai/_util/local_server.py +368 -0
- inspect_ai/_util/working.py +10 -4
- inspect_ai/_view/www/dist/assets/index.css +159 -146
- inspect_ai/_view/www/dist/assets/index.js +1020 -1061
- inspect_ai/_view/www/log-schema.json +4 -3
- inspect_ai/_view/www/package.json +1 -1
- inspect_ai/_view/www/src/@types/log.d.ts +3 -2
- inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
- inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
- inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
- inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
- inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
- inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
- inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
- inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
- inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
- inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
- inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
- inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
- inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
- inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
- inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
- inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
- inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
- inspect_ai/_view/www/src/components/Card.css +0 -1
- inspect_ai/_view/www/src/constants.ts +2 -0
- inspect_ai/_view/www/src/utils/numeric.ts +17 -0
- inspect_ai/agent/_agent.py +3 -3
- inspect_ai/agent/_as_solver.py +20 -12
- inspect_ai/agent/_as_tool.py +15 -3
- inspect_ai/agent/_handoff.py +8 -1
- inspect_ai/agent/_run.py +11 -3
- inspect_ai/log/__init__.py +4 -0
- inspect_ai/log/_file.py +56 -0
- inspect_ai/log/_log.py +99 -0
- inspect_ai/log/_recorders/__init__.py +2 -0
- inspect_ai/log/_recorders/buffer/database.py +12 -11
- inspect_ai/log/_recorders/buffer/filestore.py +2 -2
- inspect_ai/log/_recorders/buffer/types.py +2 -2
- inspect_ai/log/_recorders/eval.py +20 -65
- inspect_ai/log/_recorders/file.py +28 -6
- inspect_ai/log/_recorders/recorder.py +7 -0
- inspect_ai/log/_recorders/types.py +1 -23
- inspect_ai/log/_samples.py +0 -8
- inspect_ai/log/_transcript.py +7 -1
- inspect_ai/log/_util.py +52 -0
- inspect_ai/model/__init__.py +5 -1
- inspect_ai/model/_call_tools.py +32 -12
- inspect_ai/model/_generate_config.py +14 -8
- inspect_ai/model/_model.py +21 -48
- inspect_ai/model/_model_output.py +25 -0
- inspect_ai/model/_openai.py +2 -0
- inspect_ai/model/_openai_responses.py +13 -1
- inspect_ai/model/_providers/anthropic.py +13 -23
- inspect_ai/model/_providers/openai_o1.py +8 -2
- inspect_ai/model/_providers/providers.py +18 -4
- inspect_ai/model/_providers/sglang.py +241 -0
- inspect_ai/model/_providers/vllm.py +207 -400
- inspect_ai/solver/__init__.py +7 -2
- inspect_ai/solver/_basic_agent.py +3 -10
- inspect_ai/solver/_task_state.py +26 -88
- inspect_ai/tool/_json_rpc_helpers.py +45 -17
- inspect_ai/tool/_mcp/_mcp.py +2 -0
- inspect_ai/tool/_mcp/_sandbox.py +8 -2
- inspect_ai/tool/_mcp/server.py +3 -1
- inspect_ai/tool/_tool_call.py +4 -1
- inspect_ai/tool/_tool_support_helpers.py +51 -12
- inspect_ai/tool/_tools/_bash_session.py +190 -68
- inspect_ai/tool/_tools/_computer/_computer.py +25 -1
- inspect_ai/tool/_tools/_text_editor.py +4 -3
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
- inspect_ai/util/__init__.py +12 -0
- inspect_ai/util/_limit.py +393 -0
- inspect_ai/util/_limited_conversation.py +57 -0
- {inspect_ai-0.3.91.dist-info → inspect_ai-0.3.93.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.91.dist-info → inspect_ai-0.3.93.dist-info}/RECORD +90 -109
- {inspect_ai-0.3.91.dist-info → inspect_ai-0.3.93.dist-info}/WHEEL +1 -1
- inspect_ai/solver/_limit.py +0 -39
- inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
- inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
- inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
- inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
- inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
- inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
- inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
- inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
- inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
- inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
- inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_computer/test_args.py +0 -151
- /inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
- {inspect_ai-0.3.91.dist-info → inspect_ai-0.3.93.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.91.dist-info → inspect_ai-0.3.93.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.91.dist-info → inspect_ai-0.3.93.dist-info}/top_level.txt +0 -0
@@ -4848,9 +4848,10 @@
|
|
4848
4848
|
"permission",
|
4849
4849
|
"file_not_found",
|
4850
4850
|
"is_a_directory",
|
4851
|
-
"
|
4851
|
+
"limit",
|
4852
4852
|
"approval",
|
4853
|
-
"unknown"
|
4853
|
+
"unknown",
|
4854
|
+
"output_limit"
|
4854
4855
|
],
|
4855
4856
|
"title": "Type",
|
4856
4857
|
"type": "string"
|
@@ -5392,4 +5393,4 @@
|
|
5392
5393
|
],
|
5393
5394
|
"title": "EvalLog",
|
5394
5395
|
"type": "object"
|
5395
|
-
}
|
5396
|
+
}
|
@@ -263,9 +263,10 @@ export type Type9 =
|
|
263
263
|
| "permission"
|
264
264
|
| "file_not_found"
|
265
265
|
| "is_a_directory"
|
266
|
-
| "
|
266
|
+
| "limit"
|
267
267
|
| "approval"
|
268
|
-
| "unknown"
|
268
|
+
| "unknown"
|
269
|
+
| "output_limit";
|
269
270
|
export type Message1 = string;
|
270
271
|
export type Choices = string[] | null;
|
271
272
|
export type Target = string | string[];
|
@@ -41,13 +41,13 @@ export const MetaDataGrid: FC<MetadataGridProps> = ({
|
|
41
41
|
styles.cell,
|
42
42
|
"text-style-label",
|
43
43
|
"text-style-secondary",
|
44
|
-
"text-size-
|
44
|
+
"text-size-smaller",
|
45
45
|
)}
|
46
46
|
>
|
47
47
|
{entry.name}
|
48
48
|
</div>
|
49
49
|
<div
|
50
|
-
className={clsx(styles.value, `${baseId}-value`, "text-size-
|
50
|
+
className={clsx(styles.value, `${baseId}-value`, "text-size-smaller")}
|
51
51
|
>
|
52
52
|
<RenderedContent id={id} entry={entry} />
|
53
53
|
</div>
|
@@ -54,7 +54,7 @@ export const RenderedContent: FC<RenderedContentProps> = ({
|
|
54
54
|
if (typeof entry.value === "object") {
|
55
55
|
return JSON.stringify(entry.value);
|
56
56
|
}
|
57
|
-
return String(entry.value);
|
57
|
+
return String(entry.value).trim();
|
58
58
|
} catch (e) {
|
59
59
|
return "[Unable to display value]";
|
60
60
|
}
|
@@ -17,7 +17,9 @@ import { useLogNavigation } from "../routing/navigationHooks";
|
|
17
17
|
import styles from "./LogView.module.css";
|
18
18
|
import { useInfoTabConfig } from "./tabs/InfoTab";
|
19
19
|
import { useJsonTabConfig } from "./tabs/JsonTab";
|
20
|
+
import { useModelsTab } from "./tabs/ModelsTab";
|
20
21
|
import { useSamplesTabConfig } from "./tabs/SamplesTab";
|
22
|
+
import { useTaskTabConfig } from "./tabs/TaskTab";
|
21
23
|
import { TabDescriptor } from "./types";
|
22
24
|
|
23
25
|
export const LogView: FC = () => {
|
@@ -45,7 +47,14 @@ export const LogView: FC = () => {
|
|
45
47
|
selectedLogSummary?.plan,
|
46
48
|
selectedLogSummary?.error,
|
47
49
|
selectedLogSummary?.results,
|
50
|
+
);
|
51
|
+
|
52
|
+
const taskTabConfig = useTaskTabConfig(evalSpec, selectedLogSummary?.stats);
|
53
|
+
|
54
|
+
const modelsTabConfig = useModelsTab(
|
55
|
+
evalSpec,
|
48
56
|
selectedLogSummary?.stats,
|
57
|
+
selectedLogSummary?.status,
|
49
58
|
);
|
50
59
|
|
51
60
|
const jsonTabConfig = useJsonTabConfig(
|
@@ -60,6 +69,8 @@ export const LogView: FC = () => {
|
|
60
69
|
|
61
70
|
const tabs: Record<string, TabDescriptor<any>> = {
|
62
71
|
...(samplesTabConfig ? { samples: samplesTabConfig } : {}),
|
72
|
+
task: taskTabConfig,
|
73
|
+
model: modelsTabConfig,
|
63
74
|
config: configTabConfig,
|
64
75
|
json: jsonTabConfig,
|
65
76
|
};
|
@@ -6,14 +6,12 @@ import {
|
|
6
6
|
EvalSpec,
|
7
7
|
EvalStats,
|
8
8
|
} from "../../../@types/log";
|
9
|
-
import { UsageCard } from "../../usage/UsageCard";
|
10
|
-
import { TaskErrorCard } from "../error/TaskErrorPanel";
|
11
9
|
import { SampleSummary } from "../../../client/api/types";
|
12
10
|
import { MessageBand } from "../../../components/MessageBand";
|
13
|
-
import { ModelCard } from "../../plan/ModelCard";
|
14
11
|
import { kLogViewInfoTabId } from "../../../constants";
|
15
12
|
import { useTotalSampleCount } from "../../../state/hooks";
|
16
13
|
import { PlanCard } from "../../plan/PlanCard";
|
14
|
+
import { TaskErrorCard } from "../error/TaskErrorPanel";
|
17
15
|
|
18
16
|
// Individual hook for Info tab
|
19
17
|
export const useInfoTabConfig = (
|
@@ -21,7 +19,6 @@ export const useInfoTabConfig = (
|
|
21
19
|
evalPlan: EvalPlan | undefined,
|
22
20
|
evalError: EvalError | undefined | null,
|
23
21
|
evalResults: EvalResults | undefined | null,
|
24
|
-
evalStats: EvalStats | undefined,
|
25
22
|
) => {
|
26
23
|
const totalSampleCount = useTotalSampleCount();
|
27
24
|
return useMemo(() => {
|
@@ -35,11 +32,10 @@ export const useInfoTabConfig = (
|
|
35
32
|
evalPlan,
|
36
33
|
evalError,
|
37
34
|
evalResults,
|
38
|
-
evalStats,
|
39
35
|
sampleCount: totalSampleCount,
|
40
36
|
},
|
41
37
|
};
|
42
|
-
}, [evalSpec, evalPlan, evalError, evalResults,
|
38
|
+
}, [evalSpec, evalPlan, evalError, evalResults, totalSampleCount]);
|
43
39
|
};
|
44
40
|
|
45
41
|
interface PlanTabProps {
|
@@ -57,7 +53,6 @@ export const InfoTab: FC<PlanTabProps> = ({
|
|
57
53
|
evalSpec,
|
58
54
|
evalPlan,
|
59
55
|
evalResults,
|
60
|
-
evalStats,
|
61
56
|
evalStatus,
|
62
57
|
evalError,
|
63
58
|
sampleCount,
|
@@ -85,8 +80,6 @@ export const InfoTab: FC<PlanTabProps> = ({
|
|
85
80
|
evalPlan={evalPlan}
|
86
81
|
scores={evalResults?.scores}
|
87
82
|
/>
|
88
|
-
{evalSpec ? <ModelCard evalSpec={evalSpec} /> : undefined}
|
89
|
-
{evalStatus !== "started" ? <UsageCard stats={evalStats} /> : undefined}
|
90
83
|
{evalStatus === "error" && evalError ? (
|
91
84
|
<TaskErrorCard error={evalError} />
|
92
85
|
) : undefined}
|
@@ -0,0 +1,51 @@
|
|
1
|
+
import { FC, useMemo } from "react";
|
2
|
+
import { EvalSpec, EvalStats, Status } from "../../../@types/log";
|
3
|
+
import { kLogViewModelsTabId } from "../../../constants";
|
4
|
+
import { ModelCard } from "../../plan/ModelCard";
|
5
|
+
import { UsageCard } from "../../usage/UsageCard";
|
6
|
+
|
7
|
+
// Individual hook for Info tab
|
8
|
+
export const useModelsTab = (
|
9
|
+
evalSpec: EvalSpec | undefined,
|
10
|
+
evalStats: EvalStats | undefined,
|
11
|
+
evalStatus?: Status,
|
12
|
+
) => {
|
13
|
+
return useMemo(() => {
|
14
|
+
return {
|
15
|
+
id: kLogViewModelsTabId,
|
16
|
+
label: "Models",
|
17
|
+
scrollable: true,
|
18
|
+
component: ModelTab,
|
19
|
+
componentProps: {
|
20
|
+
evalSpec,
|
21
|
+
evalStats,
|
22
|
+
evalStatus,
|
23
|
+
},
|
24
|
+
};
|
25
|
+
}, [evalSpec, evalStats]);
|
26
|
+
};
|
27
|
+
|
28
|
+
interface ModelTabProps {
|
29
|
+
evalSpec?: EvalSpec;
|
30
|
+
evalStats?: EvalStats;
|
31
|
+
evalStatus?: Status;
|
32
|
+
}
|
33
|
+
|
34
|
+
export const ModelTab: FC<ModelTabProps> = ({
|
35
|
+
evalSpec,
|
36
|
+
evalStats,
|
37
|
+
evalStatus,
|
38
|
+
}) => {
|
39
|
+
return (
|
40
|
+
<div style={{ width: "100%" }}>
|
41
|
+
<div style={{ padding: "0.5em 1em 0 1em", width: "100%" }}>
|
42
|
+
{evalSpec ? <ModelCard evalSpec={evalSpec} /> : undefined}
|
43
|
+
{evalStatus !== "started" &&
|
44
|
+
evalStats?.model_usage &&
|
45
|
+
Object.keys(evalStats.model_usage).length > 0 && (
|
46
|
+
<UsageCard stats={evalStats} />
|
47
|
+
)}
|
48
|
+
</div>
|
49
|
+
</div>
|
50
|
+
);
|
51
|
+
};
|
@@ -0,0 +1,143 @@
|
|
1
|
+
import clsx from "clsx";
|
2
|
+
import { FC, useMemo } from "react";
|
3
|
+
import { EvalSpec, EvalStats } from "../../../@types/log";
|
4
|
+
import { Card, CardBody, CardHeader } from "../../../components/Card";
|
5
|
+
import { kLogViewTaskTabId } from "../../../constants";
|
6
|
+
import { formatDuration, toTitleCase } from "../../../utils/format";
|
7
|
+
import { ghCommitUrl } from "../../../utils/git";
|
8
|
+
import { MetaDataView } from "../../content/MetaDataView";
|
9
|
+
|
10
|
+
import styles from "./TaskTab.module.css";
|
11
|
+
|
12
|
+
// Individual hook for Info tab
|
13
|
+
export const useTaskTabConfig = (
|
14
|
+
evalSpec: EvalSpec | undefined,
|
15
|
+
evalStats?: EvalStats,
|
16
|
+
) => {
|
17
|
+
return useMemo(() => {
|
18
|
+
return {
|
19
|
+
id: kLogViewTaskTabId,
|
20
|
+
label: "Task",
|
21
|
+
scrollable: true,
|
22
|
+
component: TaskTab,
|
23
|
+
componentProps: {
|
24
|
+
evalSpec,
|
25
|
+
evalStats,
|
26
|
+
},
|
27
|
+
};
|
28
|
+
}, [evalSpec, evalStats]);
|
29
|
+
};
|
30
|
+
|
31
|
+
interface TaskTabProps {
|
32
|
+
evalSpec?: EvalSpec;
|
33
|
+
evalStats?: EvalStats;
|
34
|
+
}
|
35
|
+
|
36
|
+
export const TaskTab: FC<TaskTabProps> = ({ evalSpec, evalStats }) => {
|
37
|
+
const config: Record<string, unknown> = {};
|
38
|
+
Object.entries(evalSpec?.config || {}).forEach((entry) => {
|
39
|
+
const key = entry[0];
|
40
|
+
const value = entry[1];
|
41
|
+
config[key] = value;
|
42
|
+
});
|
43
|
+
|
44
|
+
const revision = evalSpec?.revision;
|
45
|
+
const packages = evalSpec?.packages;
|
46
|
+
|
47
|
+
const taskInformation: Record<string, unknown> = {
|
48
|
+
["Task ID"]: evalSpec?.task_id,
|
49
|
+
["Run ID"]: evalSpec?.run_id,
|
50
|
+
};
|
51
|
+
|
52
|
+
if (revision) {
|
53
|
+
taskInformation[
|
54
|
+
`${revision.type ? `${toTitleCase(revision.type)} ` : ""}Revision`
|
55
|
+
] = {
|
56
|
+
_html: (
|
57
|
+
<a href={ghCommitUrl(revision.origin, revision.commit)}>
|
58
|
+
{revision.commit}
|
59
|
+
</a>
|
60
|
+
),
|
61
|
+
};
|
62
|
+
}
|
63
|
+
if (packages) {
|
64
|
+
const names = Object.keys(packages).map((key) => {
|
65
|
+
return `${key} ${packages[key]}`;
|
66
|
+
});
|
67
|
+
|
68
|
+
if (names.length === 1) {
|
69
|
+
taskInformation["Inspect"] = names[0];
|
70
|
+
} else {
|
71
|
+
taskInformation["Inspect"] = names;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
if (evalSpec?.tags) {
|
75
|
+
taskInformation["tags"] = evalSpec?.tags.join(", ");
|
76
|
+
}
|
77
|
+
|
78
|
+
if (evalSpec?.sandbox) {
|
79
|
+
if (Array.isArray(evalSpec?.sandbox)) {
|
80
|
+
taskInformation["sandbox"] = evalSpec.sandbox[0];
|
81
|
+
if (evalSpec.sandbox[1]) {
|
82
|
+
taskInformation["sandbox_config"] = evalSpec.sandbox[1];
|
83
|
+
}
|
84
|
+
} else {
|
85
|
+
taskInformation["sandbox"] = evalSpec?.sandbox.type;
|
86
|
+
taskInformation["sandbox_config"] = evalSpec?.sandbox.config;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
const totalDuration = formatDuration(
|
91
|
+
new Date(evalStats?.started_at || 0),
|
92
|
+
new Date(evalStats?.completed_at || 0),
|
93
|
+
);
|
94
|
+
|
95
|
+
const task_args = evalSpec?.task_args || {};
|
96
|
+
|
97
|
+
return (
|
98
|
+
<div style={{ width: "100%" }}>
|
99
|
+
<div style={{ padding: "0.5em 1em 0 1em", width: "100%" }}>
|
100
|
+
<Card>
|
101
|
+
<CardHeader label="Task Info" />
|
102
|
+
<CardBody id={"task-card-config"}>
|
103
|
+
<div className={clsx(styles.grid)}>
|
104
|
+
<MetaDataView
|
105
|
+
key={`plan-md-task`}
|
106
|
+
className={"text-size-small"}
|
107
|
+
entries={taskInformation}
|
108
|
+
tableOptions="sm"
|
109
|
+
/>
|
110
|
+
|
111
|
+
<MetaDataView
|
112
|
+
entries={{
|
113
|
+
["Start"]: new Date(
|
114
|
+
evalStats?.started_at || 0,
|
115
|
+
).toLocaleString(),
|
116
|
+
["End"]: new Date(
|
117
|
+
evalStats?.completed_at || 0,
|
118
|
+
).toLocaleString(),
|
119
|
+
["Duration"]: totalDuration,
|
120
|
+
}}
|
121
|
+
tableOptions="sm"
|
122
|
+
/>
|
123
|
+
</div>
|
124
|
+
</CardBody>
|
125
|
+
</Card>
|
126
|
+
|
127
|
+
{Object.keys(task_args).length > 0 && (
|
128
|
+
<Card>
|
129
|
+
<CardHeader label="Task Args" />
|
130
|
+
<CardBody id={"task-card-config"}>
|
131
|
+
<MetaDataView
|
132
|
+
key={`plan-md-task-args`}
|
133
|
+
className={"text-size-small"}
|
134
|
+
entries={task_args as Record<string, unknown>}
|
135
|
+
tableOptions="sm"
|
136
|
+
/>
|
137
|
+
</CardBody>
|
138
|
+
</Card>
|
139
|
+
)}
|
140
|
+
</div>
|
141
|
+
</div>
|
142
|
+
);
|
143
|
+
};
|
@@ -3,7 +3,6 @@ import { FC } from "react";
|
|
3
3
|
import clsx from "clsx";
|
4
4
|
import { EvalModelConfig, EvalSpec } from "../../@types/log";
|
5
5
|
import { Card, CardBody, CardHeader } from "../../components/Card";
|
6
|
-
import { ApplicationIcons } from "../appearance/icons";
|
7
6
|
import { MetaDataGrid } from "../content/MetaDataGrid";
|
8
7
|
import styles from "./ModelCard.module.css";
|
9
8
|
|
@@ -33,7 +32,7 @@ export const ModelCard: FC<ModelCardProps> = ({ evalSpec }) => {
|
|
33
32
|
|
34
33
|
return (
|
35
34
|
<Card>
|
36
|
-
<CardHeader
|
35
|
+
<CardHeader label="Models" />
|
37
36
|
<CardBody id={"task-model-card-body"}>
|
38
37
|
<div className={styles.container}>
|
39
38
|
{Object.keys(modelsInfo || {}).map((modelKey) => {
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import { FC } from "react";
|
2
2
|
import { EvalPlan, EvalScore, EvalSpec } from "../../@types/log";
|
3
3
|
import { Card, CardBody, CardHeader } from "../../components/Card";
|
4
|
-
import {
|
4
|
+
import { MetaDataView } from "../content/MetaDataView";
|
5
5
|
import { PlanDetailView } from "./PlanDetailView";
|
6
6
|
|
7
7
|
interface PlanCardProps {
|
@@ -14,12 +14,34 @@ interface PlanCardProps {
|
|
14
14
|
* Renders the plan card
|
15
15
|
*/
|
16
16
|
export const PlanCard: FC<PlanCardProps> = ({ evalSpec, evalPlan, scores }) => {
|
17
|
+
const metadata = evalSpec?.metadata || {};
|
18
|
+
|
17
19
|
return (
|
18
|
-
|
19
|
-
<
|
20
|
-
|
21
|
-
<
|
22
|
-
|
23
|
-
|
20
|
+
<>
|
21
|
+
<Card>
|
22
|
+
<CardHeader label="Summary" />
|
23
|
+
<CardBody id={"task-plan-card-body"}>
|
24
|
+
<PlanDetailView
|
25
|
+
evaluation={evalSpec}
|
26
|
+
plan={evalPlan}
|
27
|
+
scores={scores}
|
28
|
+
/>
|
29
|
+
</CardBody>
|
30
|
+
</Card>
|
31
|
+
|
32
|
+
{Object.keys(metadata).length > 0 && (
|
33
|
+
<Card>
|
34
|
+
<CardHeader label="Metadata" />
|
35
|
+
<CardBody id={"task-metadata`"}>
|
36
|
+
<MetaDataView
|
37
|
+
key={`plan-md-metadata`}
|
38
|
+
className={"text-size-small"}
|
39
|
+
entries={metadata}
|
40
|
+
tableOptions="sm"
|
41
|
+
/>
|
42
|
+
</CardBody>
|
43
|
+
</Card>
|
44
|
+
)}
|
45
|
+
</>
|
24
46
|
);
|
25
47
|
};
|