inspect-ai 0.3.82__py3-none-any.whl → 0.3.84__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_display/textual/app.py +14 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +9 -3
- inspect_ai/_display/textual/widgets/task_detail.py +3 -4
- inspect_ai/_display/textual/widgets/tasks.py +17 -1
- inspect_ai/_display/textual/widgets/vscode.py +48 -0
- inspect_ai/_eval/eval.py +36 -24
- inspect_ai/_eval/evalset.py +17 -18
- inspect_ai/_eval/loader.py +34 -11
- inspect_ai/_eval/run.py +8 -13
- inspect_ai/_eval/score.py +13 -3
- inspect_ai/_eval/task/generate.py +8 -9
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/task.py +23 -9
- inspect_ai/_util/file.py +13 -0
- inspect_ai/_util/json.py +2 -1
- inspect_ai/_util/registry.py +1 -0
- inspect_ai/_util/vscode.py +37 -0
- inspect_ai/_view/www/App.css +6 -0
- inspect_ai/_view/www/dist/assets/index.css +304 -128
- inspect_ai/_view/www/dist/assets/index.js +47495 -27519
- inspect_ai/_view/www/log-schema.json +124 -31
- inspect_ai/_view/www/package.json +3 -0
- inspect_ai/_view/www/src/App.tsx +12 -0
- inspect_ai/_view/www/src/appearance/icons.ts +1 -0
- inspect_ai/_view/www/src/components/Card.tsx +6 -4
- inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
- inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
- inspect_ai/_view/www/src/components/Modal.module.css +38 -0
- inspect_ai/_view/www/src/components/Modal.tsx +77 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
- inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
- inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
- inspect_ai/_view/www/src/state/hooks.ts +5 -3
- inspect_ai/_view/www/src/state/logPolling.ts +5 -1
- inspect_ai/_view/www/src/state/logSlice.ts +10 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
- inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
- inspect_ai/_view/www/src/types/log.d.ts +34 -26
- inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
- inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
- inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
- inspect_ai/_view/www/yarn.lock +94 -1
- inspect_ai/agent/__init__.py +36 -0
- inspect_ai/agent/_agent.py +268 -0
- inspect_ai/agent/_as_solver.py +72 -0
- inspect_ai/agent/_as_tool.py +122 -0
- inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
- inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
- inspect_ai/agent/_filter.py +46 -0
- inspect_ai/agent/_handoff.py +93 -0
- inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
- inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
- inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
- inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
- inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
- inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
- inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
- inspect_ai/agent/_react.py +241 -0
- inspect_ai/agent/_run.py +36 -0
- inspect_ai/agent/_types.py +81 -0
- inspect_ai/log/_log.py +11 -2
- inspect_ai/log/_transcript.py +13 -9
- inspect_ai/model/__init__.py +7 -1
- inspect_ai/model/_call_tools.py +256 -52
- inspect_ai/model/_chat_message.py +7 -4
- inspect_ai/model/_conversation.py +13 -62
- inspect_ai/model/_display.py +85 -0
- inspect_ai/model/_model.py +113 -14
- inspect_ai/model/_model_output.py +14 -9
- inspect_ai/model/_openai.py +16 -4
- inspect_ai/model/_openai_computer_use.py +162 -0
- inspect_ai/model/_openai_responses.py +319 -165
- inspect_ai/model/_providers/anthropic.py +20 -21
- inspect_ai/model/_providers/azureai.py +24 -13
- inspect_ai/model/_providers/bedrock.py +1 -7
- inspect_ai/model/_providers/cloudflare.py +3 -3
- inspect_ai/model/_providers/goodfire.py +2 -6
- inspect_ai/model/_providers/google.py +11 -10
- inspect_ai/model/_providers/groq.py +6 -3
- inspect_ai/model/_providers/hf.py +7 -3
- inspect_ai/model/_providers/mistral.py +7 -10
- inspect_ai/model/_providers/openai.py +47 -17
- inspect_ai/model/_providers/openai_o1.py +11 -4
- inspect_ai/model/_providers/openai_responses.py +12 -14
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/together.py +12 -2
- inspect_ai/model/_providers/util/chatapi.py +7 -2
- inspect_ai/model/_providers/util/hf_handler.py +4 -2
- inspect_ai/model/_providers/util/llama31.py +4 -2
- inspect_ai/model/_providers/vertex.py +11 -9
- inspect_ai/model/_providers/vllm.py +4 -4
- inspect_ai/scorer/__init__.py +2 -0
- inspect_ai/scorer/_metrics/__init__.py +2 -0
- inspect_ai/scorer/_metrics/grouped.py +84 -0
- inspect_ai/scorer/_score.py +26 -6
- inspect_ai/solver/__init__.py +2 -2
- inspect_ai/solver/_basic_agent.py +22 -9
- inspect_ai/solver/_bridge.py +31 -0
- inspect_ai/solver/_chain.py +20 -12
- inspect_ai/solver/_fork.py +5 -1
- inspect_ai/solver/_human_agent.py +52 -0
- inspect_ai/solver/_prompt.py +3 -1
- inspect_ai/solver/_run.py +59 -0
- inspect_ai/solver/_solver.py +14 -4
- inspect_ai/solver/_task_state.py +5 -3
- inspect_ai/tool/_tool_call.py +15 -8
- inspect_ai/tool/_tool_def.py +17 -12
- inspect_ai/tool/_tool_support_helpers.py +2 -2
- inspect_ai/tool/_tool_with.py +14 -11
- inspect_ai/tool/_tools/_bash_session.py +11 -2
- inspect_ai/tool/_tools/_computer/_common.py +18 -2
- inspect_ai/tool/_tools/_computer/_computer.py +18 -2
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
- inspect_ai/tool/_tools/_think.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_anyio.py +27 -0
- inspect_ai/util/_sandbox/__init__.py +2 -1
- inspect_ai/util/_sandbox/context.py +32 -7
- inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/docker/docker.py +12 -1
- inspect_ai/util/_store_model.py +30 -7
- inspect_ai/util/_subprocess.py +13 -3
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/RECORD +179 -153
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
- /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
- /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/top_level.txt +0 -0
@@ -1,167 +0,0 @@
|
|
1
|
-
import clsx from "clsx";
|
2
|
-
import { Card, CardBody, CardHeader } from "../../components/Card";
|
3
|
-
import { MarkdownDiv } from "../../components/MarkdownDiv";
|
4
|
-
import { MetaDataGrid } from "../../metadata/MetaDataGrid";
|
5
|
-
import { EvalSample } from "../../types/log";
|
6
|
-
import { arrayToString, inputString } from "../../utils/format";
|
7
|
-
import { SampleScores } from "./SampleScores";
|
8
|
-
|
9
|
-
import { FC } from "react";
|
10
|
-
import { SampleSummary } from "../../api/types";
|
11
|
-
import { useEvalDescriptor } from "../../state/hooks";
|
12
|
-
import styles from "./SampleScoreView.module.css";
|
13
|
-
|
14
|
-
interface SampleScoreViewProps {
|
15
|
-
sample: EvalSample;
|
16
|
-
scorer: string;
|
17
|
-
className?: string | string[];
|
18
|
-
}
|
19
|
-
|
20
|
-
export const SampleScoreView: FC<SampleScoreViewProps> = ({
|
21
|
-
sample,
|
22
|
-
className,
|
23
|
-
scorer,
|
24
|
-
}) => {
|
25
|
-
const evalDescriptor = useEvalDescriptor();
|
26
|
-
if (!evalDescriptor) {
|
27
|
-
return null;
|
28
|
-
}
|
29
|
-
|
30
|
-
const scoreInput = inputString(sample.input);
|
31
|
-
if (sample.choices && sample.choices.length > 0) {
|
32
|
-
scoreInput.push("");
|
33
|
-
scoreInput.push(
|
34
|
-
...sample.choices.map((choice, index) => {
|
35
|
-
return `${String.fromCharCode(65 + index)}) ${choice}`;
|
36
|
-
}),
|
37
|
-
);
|
38
|
-
}
|
39
|
-
|
40
|
-
const scorerDescriptor = evalDescriptor.scorerDescriptor(sample, {
|
41
|
-
scorer,
|
42
|
-
name: scorer,
|
43
|
-
});
|
44
|
-
const explanation = scorerDescriptor.explanation() || "(No Explanation)";
|
45
|
-
const answer = scorerDescriptor.answer();
|
46
|
-
const metadata = scorerDescriptor.metadata();
|
47
|
-
|
48
|
-
return (
|
49
|
-
<div
|
50
|
-
className={clsx(
|
51
|
-
"container-fluid",
|
52
|
-
className,
|
53
|
-
"font-size-base",
|
54
|
-
styles.container,
|
55
|
-
)}
|
56
|
-
>
|
57
|
-
<Card>
|
58
|
-
<CardHeader label="Score" />
|
59
|
-
<CardBody>
|
60
|
-
<div>
|
61
|
-
<div
|
62
|
-
className={clsx(
|
63
|
-
styles.label,
|
64
|
-
"text-style-label",
|
65
|
-
"text-style-secondary",
|
66
|
-
)}
|
67
|
-
>
|
68
|
-
Input
|
69
|
-
</div>
|
70
|
-
<div>
|
71
|
-
<MarkdownDiv
|
72
|
-
markdown={scoreInput.join("\n")}
|
73
|
-
className={styles.wordBreak}
|
74
|
-
/>
|
75
|
-
</div>
|
76
|
-
</div>
|
77
|
-
|
78
|
-
<table className={clsx("table", styles.scoreTable)}>
|
79
|
-
<thead className={styles.bottomBorder}>
|
80
|
-
<tr>
|
81
|
-
<th
|
82
|
-
className={clsx(
|
83
|
-
styles.label,
|
84
|
-
"text-style-label",
|
85
|
-
"text-style-secondary",
|
86
|
-
)}
|
87
|
-
>
|
88
|
-
Target
|
89
|
-
</th>
|
90
|
-
<th
|
91
|
-
className={clsx(
|
92
|
-
styles.label,
|
93
|
-
"text-style-label",
|
94
|
-
"text-style-secondary",
|
95
|
-
)}
|
96
|
-
>
|
97
|
-
Answer
|
98
|
-
</th>
|
99
|
-
<th
|
100
|
-
className={clsx(
|
101
|
-
styles.label,
|
102
|
-
"text-style-label",
|
103
|
-
"text-style-secondary",
|
104
|
-
styles.headerScore,
|
105
|
-
)}
|
106
|
-
>
|
107
|
-
Score
|
108
|
-
</th>
|
109
|
-
</tr>
|
110
|
-
</thead>
|
111
|
-
<tbody className={styles.bottomBorder}>
|
112
|
-
<tr>
|
113
|
-
<td className={styles.targetValue}>
|
114
|
-
<MarkdownDiv
|
115
|
-
markdown={arrayToString(
|
116
|
-
arrayToString(sample?.target || "none"),
|
117
|
-
)}
|
118
|
-
className={clsx("no-last-para-padding", styles.noLeft)}
|
119
|
-
/>
|
120
|
-
</td>
|
121
|
-
<td className={clsx(styles.answerValue)}>
|
122
|
-
<MarkdownDiv
|
123
|
-
className={clsx("no-last-para-padding", styles.noLeft)}
|
124
|
-
markdown={answer}
|
125
|
-
/>
|
126
|
-
</td>
|
127
|
-
<td className={clsx(styles.scoreValue)}>
|
128
|
-
<SampleScores
|
129
|
-
sample={sample as any as SampleSummary}
|
130
|
-
scorer={scorer}
|
131
|
-
/>
|
132
|
-
</td>
|
133
|
-
</tr>
|
134
|
-
</tbody>
|
135
|
-
</table>
|
136
|
-
</CardBody>
|
137
|
-
</Card>
|
138
|
-
{explanation && explanation !== answer ? (
|
139
|
-
<Card>
|
140
|
-
<CardHeader label="Explanation" />
|
141
|
-
<CardBody>
|
142
|
-
<MarkdownDiv
|
143
|
-
markdown={arrayToString(explanation)}
|
144
|
-
className={clsx("no-last-para-padding", styles.noLeft)}
|
145
|
-
/>
|
146
|
-
</CardBody>
|
147
|
-
</Card>
|
148
|
-
) : (
|
149
|
-
""
|
150
|
-
)}
|
151
|
-
{metadata && Object.keys(metadata).length > 0 ? (
|
152
|
-
<Card>
|
153
|
-
<CardHeader label="Metadata" />
|
154
|
-
<CardBody>
|
155
|
-
<MetaDataGrid
|
156
|
-
id="task-sample-score-metadata"
|
157
|
-
className={clsx("tab-pane", styles.noTop)}
|
158
|
-
entries={metadata}
|
159
|
-
/>
|
160
|
-
</CardBody>
|
161
|
-
</Card>
|
162
|
-
) : (
|
163
|
-
""
|
164
|
-
)}
|
165
|
-
</div>
|
166
|
-
);
|
167
|
-
};
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|