inspect-ai 0.3.82__py3-none-any.whl → 0.3.84__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_display/textual/app.py +14 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +9 -3
- inspect_ai/_display/textual/widgets/task_detail.py +3 -4
- inspect_ai/_display/textual/widgets/tasks.py +17 -1
- inspect_ai/_display/textual/widgets/vscode.py +48 -0
- inspect_ai/_eval/eval.py +36 -24
- inspect_ai/_eval/evalset.py +17 -18
- inspect_ai/_eval/loader.py +34 -11
- inspect_ai/_eval/run.py +8 -13
- inspect_ai/_eval/score.py +13 -3
- inspect_ai/_eval/task/generate.py +8 -9
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/task.py +23 -9
- inspect_ai/_util/file.py +13 -0
- inspect_ai/_util/json.py +2 -1
- inspect_ai/_util/registry.py +1 -0
- inspect_ai/_util/vscode.py +37 -0
- inspect_ai/_view/www/App.css +6 -0
- inspect_ai/_view/www/dist/assets/index.css +304 -128
- inspect_ai/_view/www/dist/assets/index.js +47495 -27519
- inspect_ai/_view/www/log-schema.json +124 -31
- inspect_ai/_view/www/package.json +3 -0
- inspect_ai/_view/www/src/App.tsx +12 -0
- inspect_ai/_view/www/src/appearance/icons.ts +1 -0
- inspect_ai/_view/www/src/components/Card.tsx +6 -4
- inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
- inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
- inspect_ai/_view/www/src/components/Modal.module.css +38 -0
- inspect_ai/_view/www/src/components/Modal.tsx +77 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
- inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
- inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
- inspect_ai/_view/www/src/state/hooks.ts +5 -3
- inspect_ai/_view/www/src/state/logPolling.ts +5 -1
- inspect_ai/_view/www/src/state/logSlice.ts +10 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
- inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
- inspect_ai/_view/www/src/types/log.d.ts +34 -26
- inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
- inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
- inspect_ai/_view/www/yarn.lock +94 -1
- inspect_ai/agent/__init__.py +36 -0
- inspect_ai/agent/_agent.py +268 -0
- inspect_ai/agent/_as_solver.py +72 -0
- inspect_ai/agent/_as_tool.py +122 -0
- inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
- inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
- inspect_ai/agent/_filter.py +46 -0
- inspect_ai/agent/_handoff.py +93 -0
- inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
- inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
- inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
- inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
- inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
- inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
- inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
- inspect_ai/agent/_react.py +241 -0
- inspect_ai/agent/_run.py +36 -0
- inspect_ai/agent/_types.py +81 -0
- inspect_ai/log/_log.py +11 -2
- inspect_ai/log/_transcript.py +13 -9
- inspect_ai/model/__init__.py +7 -1
- inspect_ai/model/_call_tools.py +256 -52
- inspect_ai/model/_chat_message.py +7 -4
- inspect_ai/model/_conversation.py +13 -62
- inspect_ai/model/_display.py +85 -0
- inspect_ai/model/_model.py +113 -14
- inspect_ai/model/_model_output.py +14 -9
- inspect_ai/model/_openai.py +16 -4
- inspect_ai/model/_openai_computer_use.py +162 -0
- inspect_ai/model/_openai_responses.py +319 -165
- inspect_ai/model/_providers/anthropic.py +20 -21
- inspect_ai/model/_providers/azureai.py +24 -13
- inspect_ai/model/_providers/bedrock.py +1 -7
- inspect_ai/model/_providers/cloudflare.py +3 -3
- inspect_ai/model/_providers/goodfire.py +2 -6
- inspect_ai/model/_providers/google.py +11 -10
- inspect_ai/model/_providers/groq.py +6 -3
- inspect_ai/model/_providers/hf.py +7 -3
- inspect_ai/model/_providers/mistral.py +7 -10
- inspect_ai/model/_providers/openai.py +47 -17
- inspect_ai/model/_providers/openai_o1.py +11 -4
- inspect_ai/model/_providers/openai_responses.py +12 -14
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/together.py +12 -2
- inspect_ai/model/_providers/util/chatapi.py +7 -2
- inspect_ai/model/_providers/util/hf_handler.py +4 -2
- inspect_ai/model/_providers/util/llama31.py +4 -2
- inspect_ai/model/_providers/vertex.py +11 -9
- inspect_ai/model/_providers/vllm.py +4 -4
- inspect_ai/scorer/__init__.py +2 -0
- inspect_ai/scorer/_metrics/__init__.py +2 -0
- inspect_ai/scorer/_metrics/grouped.py +84 -0
- inspect_ai/scorer/_score.py +26 -6
- inspect_ai/solver/__init__.py +2 -2
- inspect_ai/solver/_basic_agent.py +22 -9
- inspect_ai/solver/_bridge.py +31 -0
- inspect_ai/solver/_chain.py +20 -12
- inspect_ai/solver/_fork.py +5 -1
- inspect_ai/solver/_human_agent.py +52 -0
- inspect_ai/solver/_prompt.py +3 -1
- inspect_ai/solver/_run.py +59 -0
- inspect_ai/solver/_solver.py +14 -4
- inspect_ai/solver/_task_state.py +5 -3
- inspect_ai/tool/_tool_call.py +15 -8
- inspect_ai/tool/_tool_def.py +17 -12
- inspect_ai/tool/_tool_support_helpers.py +2 -2
- inspect_ai/tool/_tool_with.py +14 -11
- inspect_ai/tool/_tools/_bash_session.py +11 -2
- inspect_ai/tool/_tools/_computer/_common.py +18 -2
- inspect_ai/tool/_tools/_computer/_computer.py +18 -2
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
- inspect_ai/tool/_tools/_think.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_anyio.py +27 -0
- inspect_ai/util/_sandbox/__init__.py +2 -1
- inspect_ai/util/_sandbox/context.py +32 -7
- inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/docker/docker.py +12 -1
- inspect_ai/util/_store_model.py +30 -7
- inspect_ai/util/_subprocess.py +13 -3
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/RECORD +179 -153
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
- /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
.container {
|
2
|
+
display: grid;
|
3
|
+
grid-template-columns:
|
4
|
+
minmax(auto, 1fr) minmax(auto, 1fr) minmax(auto, 1fr)
|
5
|
+
2fr;
|
6
|
+
column-gap: 0.75em;
|
7
|
+
}
|
8
|
+
|
9
|
+
.container .cell {
|
10
|
+
margin-bottom: 0.5em;
|
11
|
+
}
|
12
|
+
|
13
|
+
.fullWidth {
|
14
|
+
grid-column: 1 / -1;
|
15
|
+
}
|
16
|
+
|
17
|
+
.heading {
|
18
|
+
font-weight: 600;
|
19
|
+
}
|
20
|
+
|
21
|
+
.padded {
|
22
|
+
padding-bottom: 3em;
|
23
|
+
}
|
24
|
+
|
25
|
+
.separator {
|
26
|
+
height: 1px;
|
27
|
+
background-color: var(--bs-light-border-subtle);
|
28
|
+
}
|
29
|
+
|
30
|
+
.separatorPadded {
|
31
|
+
margin-top: 0.5em;
|
32
|
+
margin-bottom: 0.5em;
|
33
|
+
}
|
34
|
+
|
35
|
+
.headerSep {
|
36
|
+
margin-top: 0.1em;
|
37
|
+
margin-bottom: 0.2em;
|
38
|
+
}
|
@@ -0,0 +1,118 @@
|
|
1
|
+
import clsx from "clsx";
|
2
|
+
import { FC } from "react";
|
3
|
+
import { SampleSummary } from "../../api/types";
|
4
|
+
import { EmptyPanel } from "../../components/EmptyPanel";
|
5
|
+
import { MetaDataGrid } from "../../metadata/MetaDataGrid";
|
6
|
+
import { useEvalDescriptor } from "../../state/hooks";
|
7
|
+
import { EvalSample } from "../../types/log";
|
8
|
+
import { SampleScores } from "./SampleScores";
|
9
|
+
import styles from "./SampleScoresGrid.module.css";
|
10
|
+
|
11
|
+
interface SampleScoresGridProps {
|
12
|
+
evalSample: EvalSample;
|
13
|
+
className?: string | string[];
|
14
|
+
}
|
15
|
+
|
16
|
+
export const SampleScoresGrid: FC<SampleScoresGridProps> = ({
|
17
|
+
evalSample,
|
18
|
+
className,
|
19
|
+
}) => {
|
20
|
+
const evalDescriptor = useEvalDescriptor();
|
21
|
+
if (!evalDescriptor) {
|
22
|
+
return <EmptyPanel>No Sample Selected</EmptyPanel>;
|
23
|
+
}
|
24
|
+
return (
|
25
|
+
<div className={clsx(className, styles.container)}>
|
26
|
+
<div
|
27
|
+
className={clsx(
|
28
|
+
"text-size-smaller",
|
29
|
+
"text-style-label",
|
30
|
+
"text-style-secondary",
|
31
|
+
)}
|
32
|
+
>
|
33
|
+
Scorer
|
34
|
+
</div>
|
35
|
+
<div
|
36
|
+
className={clsx(
|
37
|
+
"text-size-smaller",
|
38
|
+
"text-style-label",
|
39
|
+
"text-style-secondary",
|
40
|
+
)}
|
41
|
+
>
|
42
|
+
Answer
|
43
|
+
</div>
|
44
|
+
<div
|
45
|
+
className={clsx(
|
46
|
+
"text-size-smaller",
|
47
|
+
"text-style-label",
|
48
|
+
"text-style-secondary",
|
49
|
+
)}
|
50
|
+
>
|
51
|
+
Score
|
52
|
+
</div>
|
53
|
+
<div
|
54
|
+
className={clsx(
|
55
|
+
"text-size-smaller",
|
56
|
+
"text-style-label",
|
57
|
+
"text-style-secondary",
|
58
|
+
)}
|
59
|
+
>
|
60
|
+
Explanation
|
61
|
+
</div>
|
62
|
+
<div
|
63
|
+
className={clsx(styles.separator, styles.fullWidth, styles.headerSep)}
|
64
|
+
></div>
|
65
|
+
|
66
|
+
{Object.keys(evalSample.scores || {}).map((scorer) => {
|
67
|
+
if (!evalSample.scores) {
|
68
|
+
return undefined;
|
69
|
+
}
|
70
|
+
const scoreData = evalSample.scores[scorer];
|
71
|
+
const explanation = scoreData.explanation || "(No Explanation)";
|
72
|
+
const answer = scoreData.answer;
|
73
|
+
let metadata = scoreData.metadata || {};
|
74
|
+
|
75
|
+
return (
|
76
|
+
<>
|
77
|
+
<div className={clsx("text-size-base", styles.cell)}>{scorer}</div>
|
78
|
+
<div className={clsx(styles.cell, "text-size-base")}>{answer}</div>
|
79
|
+
<div className={clsx(styles.cell, "text-size-base")}>
|
80
|
+
<SampleScores
|
81
|
+
sample={evalSample as any as SampleSummary}
|
82
|
+
scorer={scorer}
|
83
|
+
/>
|
84
|
+
</div>
|
85
|
+
<div className={clsx("text-size-base", styles.cell)}>
|
86
|
+
{explanation}
|
87
|
+
</div>
|
88
|
+
|
89
|
+
{Object.keys(metadata).length > 0 ? (
|
90
|
+
<>
|
91
|
+
<div
|
92
|
+
className={clsx(
|
93
|
+
"text-size-smaller",
|
94
|
+
"text-style-label",
|
95
|
+
"text-style-secondary",
|
96
|
+
styles.fullWidth,
|
97
|
+
)}
|
98
|
+
>
|
99
|
+
Metadata
|
100
|
+
</div>
|
101
|
+
<div className={clsx(styles.fullWidth)}>
|
102
|
+
<MetaDataGrid entries={metadata} />
|
103
|
+
</div>
|
104
|
+
<div
|
105
|
+
className={clsx(
|
106
|
+
styles.separator,
|
107
|
+
styles.separatorPadded,
|
108
|
+
styles.fullWidth,
|
109
|
+
)}
|
110
|
+
></div>
|
111
|
+
</>
|
112
|
+
) : undefined}
|
113
|
+
</>
|
114
|
+
);
|
115
|
+
})}
|
116
|
+
</div>
|
117
|
+
);
|
118
|
+
};
|
inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css}
RENAMED
@@ -1,6 +1,7 @@
|
|
1
1
|
.container {
|
2
2
|
margin-top: 0.5em;
|
3
3
|
padding-left: 0;
|
4
|
+
padding-right: 0;
|
4
5
|
}
|
5
6
|
|
6
7
|
.label {
|
@@ -12,7 +13,7 @@
|
|
12
13
|
}
|
13
14
|
|
14
15
|
.wordBreak {
|
15
|
-
word-break: break-
|
16
|
+
word-break: break-word;
|
16
17
|
}
|
17
18
|
|
18
19
|
.scoreTable {
|
@@ -51,3 +52,11 @@
|
|
51
52
|
.noTop {
|
52
53
|
margin-top: 0 !important;
|
53
54
|
}
|
55
|
+
|
56
|
+
.scoreCard {
|
57
|
+
padding-top: 0.5em;
|
58
|
+
}
|
59
|
+
|
60
|
+
.scores {
|
61
|
+
padding-top: 1em;
|
62
|
+
}
|
@@ -0,0 +1,78 @@
|
|
1
|
+
import clsx from "clsx";
|
2
|
+
import { Card, CardBody } from "../../components/Card";
|
3
|
+
import { MarkdownDiv } from "../../components/MarkdownDiv";
|
4
|
+
import { EvalSample } from "../../types/log";
|
5
|
+
import { inputString } from "../../utils/format";
|
6
|
+
|
7
|
+
import { FC } from "react";
|
8
|
+
import ExpandablePanel from "../../components/ExpandablePanel";
|
9
|
+
import { useEvalDescriptor } from "../../state/hooks";
|
10
|
+
import { SampleScoresGrid } from "./SampleScoresGrid";
|
11
|
+
import styles from "./SampleScoresView.module.css";
|
12
|
+
|
13
|
+
interface SampleScoresViewProps {
|
14
|
+
sample?: EvalSample;
|
15
|
+
className?: string | string[];
|
16
|
+
}
|
17
|
+
|
18
|
+
export const SampleScoresView: FC<SampleScoresViewProps> = ({
|
19
|
+
sample,
|
20
|
+
className,
|
21
|
+
}) => {
|
22
|
+
const evalDescriptor = useEvalDescriptor();
|
23
|
+
if (!evalDescriptor) {
|
24
|
+
return undefined;
|
25
|
+
}
|
26
|
+
if (!sample) {
|
27
|
+
return undefined;
|
28
|
+
}
|
29
|
+
|
30
|
+
const scoreInput = inputString(sample.input);
|
31
|
+
if (sample.choices && sample.choices.length > 0) {
|
32
|
+
scoreInput.push("");
|
33
|
+
scoreInput.push(
|
34
|
+
...sample.choices.map((choice, index) => {
|
35
|
+
return `${String.fromCharCode(65 + index)}) ${choice}`;
|
36
|
+
}),
|
37
|
+
);
|
38
|
+
}
|
39
|
+
|
40
|
+
return (
|
41
|
+
<div
|
42
|
+
className={clsx(
|
43
|
+
"container-fluid",
|
44
|
+
className,
|
45
|
+
"font-size-base",
|
46
|
+
styles.container,
|
47
|
+
)}
|
48
|
+
>
|
49
|
+
<Card className={clsx(styles.scoreCard)}>
|
50
|
+
<CardBody>
|
51
|
+
<div
|
52
|
+
className={clsx(
|
53
|
+
"text-size-small",
|
54
|
+
"text-style-label",
|
55
|
+
"text-style-secondary",
|
56
|
+
)}
|
57
|
+
>
|
58
|
+
Input
|
59
|
+
</div>
|
60
|
+
<ExpandablePanel
|
61
|
+
lines={10}
|
62
|
+
id={`sample-score-${sample.id}-${sample.epoch}`}
|
63
|
+
collapse={true}
|
64
|
+
>
|
65
|
+
<MarkdownDiv
|
66
|
+
markdown={scoreInput.join("\n")}
|
67
|
+
className={clsx(styles.wordBreak, "text-size-base")}
|
68
|
+
/>
|
69
|
+
</ExpandablePanel>
|
70
|
+
<SampleScoresGrid
|
71
|
+
evalSample={sample}
|
72
|
+
className={clsx(styles.scores)}
|
73
|
+
/>
|
74
|
+
</CardBody>
|
75
|
+
</Card>
|
76
|
+
</div>
|
77
|
+
);
|
78
|
+
};
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import { FC } from "react";
|
2
2
|
import { ApplicationIcons } from "../../appearance/icons";
|
3
|
-
import { SampleLimitEvent,
|
3
|
+
import { SampleLimitEvent, Type9 } from "../../types/log";
|
4
4
|
import { EventPanel } from "./event/EventPanel";
|
5
5
|
|
6
6
|
interface SampleLimitEventViewProps {
|
@@ -17,7 +17,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
|
|
17
17
|
event,
|
18
18
|
className,
|
19
19
|
}) => {
|
20
|
-
const resolve_title = (type:
|
20
|
+
const resolve_title = (type: Type9) => {
|
21
21
|
switch (type) {
|
22
22
|
case "custom":
|
23
23
|
return "Custom Limit Exceeded";
|
@@ -34,7 +34,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
|
|
34
34
|
}
|
35
35
|
};
|
36
36
|
|
37
|
-
const resolve_icon = (type:
|
37
|
+
const resolve_icon = (type: Type9) => {
|
38
38
|
switch (type) {
|
39
39
|
case "custom":
|
40
40
|
return ApplicationIcons.limits.custom;
|
@@ -9,6 +9,7 @@ import { TranscriptView } from "./TranscriptView";
|
|
9
9
|
import clsx from "clsx";
|
10
10
|
import { FC, useMemo } from "react";
|
11
11
|
import { PulsingDots } from "../../components/PulsingDots";
|
12
|
+
import { ChatView } from "../chat/ChatView";
|
12
13
|
import { formatTiming, formatTitle } from "./event/utils";
|
13
14
|
import styles from "./ToolEventView.module.css";
|
14
15
|
|
@@ -34,10 +35,19 @@ export const ToolEventView: FC<ToolEventViewProps> = ({
|
|
34
35
|
[event.function, event.arguments],
|
35
36
|
);
|
36
37
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
const { approvalEvent, lastModelEvent } = useMemo(() => {
|
39
|
+
// Find an approval if there is one
|
40
|
+
const approvalEvent = event.events.find((e) => {
|
41
|
+
return e.event === "approval";
|
42
|
+
});
|
43
|
+
|
44
|
+
// Find a model message to render, if there is one
|
45
|
+
const lastModelEvent = [...event.events].reverse().find((e) => {
|
46
|
+
return e.event === "model";
|
47
|
+
});
|
48
|
+
|
49
|
+
return { approvalEvent, lastModelEvent };
|
50
|
+
}, [event.events]);
|
41
51
|
|
42
52
|
const title = `Tool: ${event.view?.title || event.function}`;
|
43
53
|
return (
|
@@ -58,6 +68,16 @@ export const ToolEventView: FC<ToolEventViewProps> = ({
|
|
58
68
|
mode="compact"
|
59
69
|
view={event.view ? event.view : undefined}
|
60
70
|
/>
|
71
|
+
|
72
|
+
{lastModelEvent && lastModelEvent.event === "model" ? (
|
73
|
+
<ChatView
|
74
|
+
id={`${id}-toolcall-chatmessage`}
|
75
|
+
messages={lastModelEvent.output.choices.map((m) => m.message)}
|
76
|
+
numbered={false}
|
77
|
+
toolCallStyle="compact"
|
78
|
+
/>
|
79
|
+
) : undefined}
|
80
|
+
|
61
81
|
{approvalEvent ? (
|
62
82
|
<ApprovalEventView
|
63
83
|
event={approvalEvent}
|
@@ -76,6 +96,7 @@ export const ToolEventView: FC<ToolEventViewProps> = ({
|
|
76
96
|
<TranscriptView
|
77
97
|
id={`${id}-subtask`}
|
78
98
|
data-name="Transcript"
|
99
|
+
data-default={event.failed || event.agent ? true : null}
|
79
100
|
events={event.events}
|
80
101
|
depth={depth + 1}
|
81
102
|
/>
|
@@ -1,5 +1,11 @@
|
|
1
1
|
import clsx from "clsx";
|
2
|
-
import {
|
2
|
+
import {
|
3
|
+
FC,
|
4
|
+
isValidElement,
|
5
|
+
ReactElement,
|
6
|
+
ReactNode,
|
7
|
+
useCallback,
|
8
|
+
} from "react";
|
3
9
|
import { ApplicationIcons } from "../../../appearance/icons";
|
4
10
|
import { EventNavs } from "./EventNavs";
|
5
11
|
|
@@ -49,7 +55,11 @@ export const EventPanel: FC<EventPanelProps> = ({
|
|
49
55
|
const filteredArrChildren = (
|
50
56
|
Array.isArray(children) ? children : [children]
|
51
57
|
).filter((child) => !!child);
|
52
|
-
|
58
|
+
|
59
|
+
const defaultPill = filteredArrChildren.findIndex((node) => {
|
60
|
+
return hasDataDefault(node) && node.props["data-default"];
|
61
|
+
});
|
62
|
+
const defaultPillId = defaultPill !== -1 ? pillId(defaultPill) : pillId(0);
|
53
63
|
|
54
64
|
const [selectedNav, setSelectedNav] = useProperty(id, "selectedNav", {
|
55
65
|
defaultValue: defaultPillId,
|
@@ -186,3 +196,20 @@ export const EventPanel: FC<EventPanelProps> = ({
|
|
186
196
|
);
|
187
197
|
return card;
|
188
198
|
};
|
199
|
+
|
200
|
+
// Typeguard for reading default value from pills
|
201
|
+
interface DataDefaultProps {
|
202
|
+
"data-default"?: boolean;
|
203
|
+
[key: string]: any;
|
204
|
+
}
|
205
|
+
|
206
|
+
function hasDataDefault(
|
207
|
+
node: ReactNode,
|
208
|
+
): node is ReactElement<DataDefaultProps> {
|
209
|
+
return (
|
210
|
+
isValidElement(node) &&
|
211
|
+
node.props !== null &&
|
212
|
+
typeof node.props === "object" &&
|
213
|
+
"data-default" in node.props
|
214
|
+
);
|
215
|
+
}
|
@@ -216,7 +216,6 @@ const createMessageRenderer = (name: string, role: string): ChangeType => {
|
|
216
216
|
return {
|
217
217
|
type: name,
|
218
218
|
match: (changes: JsonChange[]) => {
|
219
|
-
console.log(changes);
|
220
219
|
if (changes.length === 1) {
|
221
220
|
const change = changes[0];
|
222
221
|
if (change.op === "add" && change.path.match(/\/messages\/\d+/)) {
|
@@ -78,7 +78,9 @@ export const useScores = () => {
|
|
78
78
|
return [];
|
79
79
|
}
|
80
80
|
|
81
|
-
|
81
|
+
const result =
|
82
|
+
getAvailableScorers(selectedLogSummary, sampleSummaries) || [];
|
83
|
+
return result;
|
82
84
|
}, [selectedLogSummary, sampleSummaries]);
|
83
85
|
};
|
84
86
|
|
@@ -361,8 +363,8 @@ export const usePrismHighlight = (toolCallContent?: string) => {
|
|
361
363
|
toolCallContent.length <= kPrismRenderMaxSize
|
362
364
|
) {
|
363
365
|
requestAnimationFrame(() => {
|
364
|
-
const codeBlocks = toolViewRef.current
|
365
|
-
codeBlocks
|
366
|
+
const codeBlocks = toolViewRef.current?.querySelectorAll("pre code");
|
367
|
+
codeBlocks?.forEach((block) => {
|
366
368
|
if (block.className.includes("language-")) {
|
367
369
|
block.classList.add("sourceCode");
|
368
370
|
highlightElement(block as HTMLElement);
|
@@ -133,7 +133,11 @@ export function createLogPolling(
|
|
133
133
|
log.debug(`Stop polling running samples: ${logFileName}`);
|
134
134
|
|
135
135
|
// Clear pending summaries and refresh in one transaction
|
136
|
-
if (
|
136
|
+
if (
|
137
|
+
loadedPendingSamples ||
|
138
|
+
state.log.selectedLogSummary?.status === "started"
|
139
|
+
) {
|
140
|
+
log.debug(`Refresh log: ${logFileName}`);
|
137
141
|
await refreshLog(logFileName, true);
|
138
142
|
}
|
139
143
|
|
@@ -45,6 +45,9 @@ export interface LogSlice {
|
|
45
45
|
|
46
46
|
// Refresh the current log
|
47
47
|
refreshLog: () => Promise<void>;
|
48
|
+
|
49
|
+
// Poll the currently selected log
|
50
|
+
pollLog: () => Promise<void>;
|
48
51
|
};
|
49
52
|
}
|
50
53
|
|
@@ -175,6 +178,13 @@ export const createLogSlice = (
|
|
175
178
|
}
|
176
179
|
},
|
177
180
|
|
181
|
+
pollLog: async () => {
|
182
|
+
const currentLog = get().log.loadedLog;
|
183
|
+
if (currentLog) {
|
184
|
+
logPolling.startPolling(currentLog);
|
185
|
+
}
|
186
|
+
},
|
187
|
+
|
178
188
|
refreshLog: async () => {
|
179
189
|
const state = get();
|
180
190
|
const api = state.api;
|
@@ -123,7 +123,10 @@ export function createSamplePolling(
|
|
123
123
|
|
124
124
|
// Also fetch a fresh sample and clear the runnning Events
|
125
125
|
// (if there were ever running events)
|
126
|
-
if (
|
126
|
+
if (
|
127
|
+
state.sample.runningEvents.length > 0 ||
|
128
|
+
state.sample.sampleStatus === "streaming"
|
129
|
+
) {
|
127
130
|
try {
|
128
131
|
log.debug(
|
129
132
|
`LOADING COMPLETED SAMPLE AFTER FLUSH: ${summary.id}-${summary.epoch}`,
|
@@ -23,6 +23,11 @@ export interface SampleSlice {
|
|
23
23
|
logFile: string,
|
24
24
|
sampleSummary: SampleSummary,
|
25
25
|
) => Promise<void>;
|
26
|
+
|
27
|
+
pollSample: (
|
28
|
+
logFile: string,
|
29
|
+
sampleSummary: SampleSummary,
|
30
|
+
) => Promise<void>;
|
26
31
|
};
|
27
32
|
}
|
28
33
|
|
@@ -68,6 +73,14 @@ export const createSampleSlice = (
|
|
68
73
|
set((state) => {
|
69
74
|
state.sample.sampleError = error;
|
70
75
|
}),
|
76
|
+
pollSample: async (logFile: string, sampleSummary: SampleSummary) => {
|
77
|
+
// Poll running sample
|
78
|
+
const state = get();
|
79
|
+
if (state.log.loadedLog && state.sample.selectedSample) {
|
80
|
+
samplePolling.startPolling(logFile, sampleSummary);
|
81
|
+
state.sampleActions.setSampleStatus("streaming");
|
82
|
+
}
|
83
|
+
},
|
71
84
|
loadSample: async (logFile: string, sampleSummary: SampleSummary) => {
|
72
85
|
const sampleActions = get().sampleActions;
|
73
86
|
|