inspect-ai 0.3.103__py3-none-any.whl → 0.3.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +2 -1
- inspect_ai/_cli/eval.py +2 -2
- inspect_ai/_display/core/active.py +3 -0
- inspect_ai/_display/core/config.py +1 -0
- inspect_ai/_display/core/panel.py +21 -13
- inspect_ai/_display/core/results.py +3 -7
- inspect_ai/_display/core/rich.py +3 -5
- inspect_ai/_display/log/__init__.py +0 -0
- inspect_ai/_display/log/display.py +173 -0
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_display/rich/display.py +2 -4
- inspect_ai/_display/textual/app.py +1 -6
- inspect_ai/_display/textual/widgets/task_detail.py +3 -14
- inspect_ai/_display/textual/widgets/tasks.py +1 -1
- inspect_ai/_eval/eval.py +1 -1
- inspect_ai/_eval/evalset.py +2 -2
- inspect_ai/_eval/registry.py +6 -1
- inspect_ai/_eval/run.py +5 -1
- inspect_ai/_eval/task/constants.py +1 -0
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/run.py +1 -1
- inspect_ai/_util/citation.py +88 -0
- inspect_ai/_util/content.py +24 -2
- inspect_ai/_util/json.py +17 -2
- inspect_ai/_util/registry.py +19 -4
- inspect_ai/_view/schema.py +0 -6
- inspect_ai/_view/www/dist/assets/index.css +82 -24
- inspect_ai/_view/www/dist/assets/index.js +10124 -9808
- inspect_ai/_view/www/log-schema.json +418 -1
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/@types/log.d.ts +140 -39
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
- inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
- inspect_ai/_view/www/src/tests/README.md +2 -2
- inspect_ai/_view/www/src/utils/git.ts +3 -1
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/agent/_handoff.py +3 -3
- inspect_ai/log/_condense.py +5 -0
- inspect_ai/log/_file.py +4 -1
- inspect_ai/log/_log.py +9 -4
- inspect_ai/log/_recorders/json.py +4 -2
- inspect_ai/log/_util.py +2 -0
- inspect_ai/model/__init__.py +14 -0
- inspect_ai/model/_call_tools.py +13 -4
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_openai_responses.py +80 -34
- inspect_ai/model/_providers/_anthropic_citations.py +158 -0
- inspect_ai/model/_providers/_google_citations.py +100 -0
- inspect_ai/model/_providers/anthropic.py +196 -34
- inspect_ai/model/_providers/google.py +94 -22
- inspect_ai/model/_providers/mistral.py +20 -7
- inspect_ai/model/_providers/openai.py +11 -10
- inspect_ai/model/_providers/openai_compatible.py +3 -2
- inspect_ai/model/_providers/openai_responses.py +2 -5
- inspect_ai/model/_providers/perplexity.py +123 -0
- inspect_ai/model/_providers/providers.py +13 -2
- inspect_ai/model/_providers/vertex.py +3 -0
- inspect_ai/model/_trim.py +5 -0
- inspect_ai/tool/__init__.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +5 -2
- inspect_ai/tool/_mcp/sampling.py +19 -3
- inspect_ai/tool/_mcp/server.py +1 -1
- inspect_ai/tool/_tool.py +10 -1
- inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
- inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
- inspect_ai/tool/_tools/_web_search/_google.py +22 -25
- inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
- inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
- inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
- inspect_ai/util/_display.py +11 -2
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_span.py +12 -1
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +110 -86
- /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
- /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
|
|
1
|
+
import { FC, ReactNode } from "react";
|
2
|
+
import { WebSearch } from "./WebSearch";
|
3
|
+
|
4
|
+
import clsx from "clsx";
|
5
|
+
import { ContentData } from "../../../../@types/log";
|
6
|
+
import { RecordTree } from "../../../content/RecordTree";
|
7
|
+
import styles from "./ContentDataView.module.css";
|
8
|
+
import { WebSearchContentData, WebSearchResults } from "./WebSearchResults";
|
9
|
+
|
10
|
+
export interface ContentDataProps {
|
11
|
+
id: string;
|
12
|
+
contentData: ContentData;
|
13
|
+
}
|
14
|
+
|
15
|
+
interface RenderableData {
|
16
|
+
type: string;
|
17
|
+
name?: string;
|
18
|
+
[key: string]: any;
|
19
|
+
}
|
20
|
+
|
21
|
+
export const ContentDataView: FC<ContentDataProps> = ({ id, contentData }) => {
|
22
|
+
const renderableData = contentData.data as RenderableData;
|
23
|
+
|
24
|
+
const renderer = contentDataRenderers.find((r) =>
|
25
|
+
r.canRender(renderableData),
|
26
|
+
);
|
27
|
+
|
28
|
+
if (!renderer) {
|
29
|
+
const { encrypted_content, ...record } = renderableData;
|
30
|
+
return (
|
31
|
+
<div className={clsx(styles.contentData)}>
|
32
|
+
<RecordTree
|
33
|
+
id={`${id}-tree`}
|
34
|
+
record={record}
|
35
|
+
className={clsx(styles.data)}
|
36
|
+
defaultExpandLevel={0}
|
37
|
+
/>
|
38
|
+
</div>
|
39
|
+
);
|
40
|
+
}
|
41
|
+
|
42
|
+
return (
|
43
|
+
<div className={clsx(styles.contentData)}>
|
44
|
+
{renderer.render(renderableData)}
|
45
|
+
</div>
|
46
|
+
);
|
47
|
+
};
|
48
|
+
|
49
|
+
// The following handles rendering of the content data based on its type
|
50
|
+
// and name, allowing for different renderers to be used for different types of content data.
|
51
|
+
|
52
|
+
interface ContentDataRenderer {
|
53
|
+
name: string;
|
54
|
+
canRender: (data: RenderableData) => boolean;
|
55
|
+
render: (data: RenderableData) => ReactNode;
|
56
|
+
}
|
57
|
+
|
58
|
+
const webSearchServerToolRenderer: ContentDataRenderer = {
|
59
|
+
name: "WebSearch",
|
60
|
+
canRender: (data: RenderableData) => {
|
61
|
+
return data.type === "server_tool_use" && data.name === "web_search";
|
62
|
+
},
|
63
|
+
render: (data: RenderableData): ReactNode => {
|
64
|
+
return <WebSearch query={data.input.query} />;
|
65
|
+
},
|
66
|
+
};
|
67
|
+
|
68
|
+
const webSearchResultsServerToolRenderer: ContentDataRenderer = {
|
69
|
+
name: "WebSearchResults",
|
70
|
+
canRender: (data: RenderableData) => {
|
71
|
+
return (
|
72
|
+
data.type === "web_search_tool_result" && Array.isArray(data.content)
|
73
|
+
);
|
74
|
+
},
|
75
|
+
render: (data: RenderableData): ReactNode => {
|
76
|
+
const results: WebSearchContentData[] =
|
77
|
+
data.content as WebSearchContentData[];
|
78
|
+
return <WebSearchResults results={results} />;
|
79
|
+
},
|
80
|
+
};
|
81
|
+
|
82
|
+
const serverToolRenderer: ContentDataRenderer = {
|
83
|
+
name: "ServerTool",
|
84
|
+
canRender: (data: RenderableData) => data.type === "server_tool_use",
|
85
|
+
render: (data: RenderableData): ReactNode => {
|
86
|
+
return (
|
87
|
+
<>
|
88
|
+
<div
|
89
|
+
className={clsx(
|
90
|
+
"text-style-label",
|
91
|
+
"text-style-secondary",
|
92
|
+
"text-size-smaller",
|
93
|
+
)}
|
94
|
+
>
|
95
|
+
Server Tool
|
96
|
+
</div>
|
97
|
+
<RecordTree
|
98
|
+
id={data.name || "server-tool"}
|
99
|
+
record={data}
|
100
|
+
className={clsx(styles.data)}
|
101
|
+
/>
|
102
|
+
</>
|
103
|
+
);
|
104
|
+
},
|
105
|
+
};
|
106
|
+
|
107
|
+
export const contentDataRenderers: ContentDataRenderer[] = [
|
108
|
+
webSearchServerToolRenderer,
|
109
|
+
webSearchResultsServerToolRenderer,
|
110
|
+
serverToolRenderer,
|
111
|
+
];
|
@@ -0,0 +1,14 @@
|
|
1
|
+
import clsx from "clsx";
|
2
|
+
import { FC } from "react";
|
3
|
+
import styles from "./WebSearch.module.css";
|
4
|
+
|
5
|
+
export const WebSearch: FC<{ query: string }> = ({ query }) => {
|
6
|
+
return (
|
7
|
+
<div className={clsx(styles.webSearch, "text-size-smaller")}>
|
8
|
+
<span className={clsx("text-style-label", "text-style-secondary")}>
|
9
|
+
Web Search:
|
10
|
+
</span>
|
11
|
+
<span className={clsx(styles.query, "text-size-smallest")}>{query}</span>
|
12
|
+
</div>
|
13
|
+
);
|
14
|
+
};
|
@@ -0,0 +1,19 @@
|
|
1
|
+
.webSearch {
|
2
|
+
display: grid;
|
3
|
+
grid-template-columns: max-content 1fr;
|
4
|
+
column-gap: 0.5em;
|
5
|
+
align-items: baseline;
|
6
|
+
}
|
7
|
+
|
8
|
+
.query {
|
9
|
+
font-family: var(--bs-font-monospace);
|
10
|
+
}
|
11
|
+
|
12
|
+
.result a:hover {
|
13
|
+
text-decoration: underline;
|
14
|
+
}
|
15
|
+
|
16
|
+
.result a {
|
17
|
+
opacity: 0.8;
|
18
|
+
text-decoration: none;
|
19
|
+
}
|
@@ -0,0 +1,49 @@
|
|
1
|
+
import clsx from "clsx";
|
2
|
+
import { FC } from "react";
|
3
|
+
import styles from "./WebSearchResults.module.css";
|
4
|
+
|
5
|
+
export interface WebSearchContentData {
|
6
|
+
title: string;
|
7
|
+
url: string;
|
8
|
+
page_age: string;
|
9
|
+
}
|
10
|
+
|
11
|
+
export const WebSearchResults: FC<{ results: WebSearchContentData[] }> = ({
|
12
|
+
results,
|
13
|
+
}) => {
|
14
|
+
return (
|
15
|
+
<>
|
16
|
+
<div
|
17
|
+
className={clsx(
|
18
|
+
styles.label,
|
19
|
+
"text-style-label",
|
20
|
+
"text-style-secondary",
|
21
|
+
"text-size-smaller",
|
22
|
+
)}
|
23
|
+
>
|
24
|
+
Results
|
25
|
+
</div>
|
26
|
+
|
27
|
+
<ol className={clsx(styles.results, "text-size-smaller")}>
|
28
|
+
{results.map((result, index) => (
|
29
|
+
<li
|
30
|
+
key={index}
|
31
|
+
className={clsx(styles.result, "text-style-secondary")}
|
32
|
+
>
|
33
|
+
<a
|
34
|
+
href={result.url}
|
35
|
+
target="_blank"
|
36
|
+
rel="noopener noreferrer"
|
37
|
+
title={
|
38
|
+
result.url +
|
39
|
+
(result.page_age ? `\n(Age: ${result.page_age})` : "")
|
40
|
+
}
|
41
|
+
>
|
42
|
+
{result.title}
|
43
|
+
</a>
|
44
|
+
</li>
|
45
|
+
))}
|
46
|
+
</ol>
|
47
|
+
</>
|
48
|
+
);
|
49
|
+
};
|
@@ -4,6 +4,7 @@ import {
|
|
4
4
|
ChatMessageTool,
|
5
5
|
ChatMessageUser,
|
6
6
|
ContentAudio,
|
7
|
+
ContentData,
|
7
8
|
ContentImage,
|
8
9
|
ContentReasoning,
|
9
10
|
ContentText,
|
@@ -64,6 +65,7 @@ export const resolveMessages = (messages: Messages) => {
|
|
64
65
|
| ContentAudio
|
65
66
|
| ContentVideo
|
66
67
|
| ContentReasoning
|
68
|
+
| ContentData
|
67
69
|
)[] = [];
|
68
70
|
for (const systemMessage of systemMessages) {
|
69
71
|
const contents = Array.isArray(systemMessage.content)
|
@@ -78,6 +80,7 @@ export const resolveMessages = (messages: Messages) => {
|
|
78
80
|
content: systemContent,
|
79
81
|
source: "input",
|
80
82
|
internal: null,
|
83
|
+
metadata: null,
|
81
84
|
};
|
82
85
|
|
83
86
|
// Converge them
|
@@ -117,19 +120,22 @@ const normalizeContent = (
|
|
117
120
|
| ContentAudio
|
118
121
|
| ContentVideo
|
119
122
|
| ContentReasoning
|
123
|
+
| ContentData
|
120
124
|
| string,
|
121
125
|
):
|
122
126
|
| ContentText
|
123
127
|
| ContentImage
|
124
128
|
| ContentAudio
|
125
129
|
| ContentVideo
|
126
|
-
| ContentReasoning
|
130
|
+
| ContentReasoning
|
131
|
+
| ContentData => {
|
127
132
|
if (typeof content === "string") {
|
128
133
|
return {
|
129
134
|
type: "text",
|
130
135
|
text: content,
|
131
136
|
refusal: null,
|
132
137
|
internal: null,
|
138
|
+
citations: null,
|
133
139
|
};
|
134
140
|
} else {
|
135
141
|
return content;
|
@@ -2,6 +2,7 @@ import clsx from "clsx";
|
|
2
2
|
import { FC, useMemo } from "react";
|
3
3
|
import {
|
4
4
|
ContentAudio,
|
5
|
+
ContentData,
|
5
6
|
ContentImage,
|
6
7
|
ContentReasoning,
|
7
8
|
ContentText,
|
@@ -11,6 +12,7 @@ import {
|
|
11
12
|
import { ContentTool } from "../../../../app/types";
|
12
13
|
import ExpandablePanel from "../../../../components/ExpandablePanel";
|
13
14
|
import { MessageContent } from "../MessageContent";
|
15
|
+
import { defaultContext } from "../MessageContents";
|
14
16
|
import styles from "./ToolCallView.module.css";
|
15
17
|
import { ToolInput } from "./ToolInput";
|
16
18
|
import { ToolTitle } from "./ToolTitle";
|
@@ -31,6 +33,7 @@ interface ToolCallViewProps {
|
|
31
33
|
| ContentVideo
|
32
34
|
| ContentTool
|
33
35
|
| ContentReasoning
|
36
|
+
| ContentData
|
34
37
|
| (
|
35
38
|
| ContentText
|
36
39
|
| ContentAudio
|
@@ -38,6 +41,7 @@ interface ToolCallViewProps {
|
|
38
41
|
| ContentVideo
|
39
42
|
| ContentTool
|
40
43
|
| ContentReasoning
|
44
|
+
| ContentData
|
41
45
|
)[];
|
42
46
|
mode?: "compact";
|
43
47
|
}
|
@@ -65,7 +69,8 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
|
|
65
69
|
| ContentImage
|
66
70
|
| ContentVideo
|
67
71
|
| ContentTool
|
68
|
-
| ContentReasoning
|
72
|
+
| ContentReasoning
|
73
|
+
| ContentData,
|
69
74
|
) {
|
70
75
|
if (value && typeof value === "object") {
|
71
76
|
if (value.type === "image") {
|
@@ -105,6 +110,7 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
|
|
105
110
|
});
|
106
111
|
|
107
112
|
const contents = mode !== "compact" ? input : input || functionCall;
|
113
|
+
const context = defaultContext();
|
108
114
|
return (
|
109
115
|
<div className={clsx(styles.toolCallView)}>
|
110
116
|
<div>
|
@@ -127,7 +133,7 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
|
|
127
133
|
lines={15}
|
128
134
|
className={clsx("text-size-small")}
|
129
135
|
>
|
130
|
-
<MessageContent contents={normalizedContent} />
|
136
|
+
<MessageContent contents={normalizedContent} context={context} />
|
131
137
|
</ExpandablePanel>
|
132
138
|
) : undefined}
|
133
139
|
</div>
|
@@ -148,6 +154,7 @@ const normalizeContent = (
|
|
148
154
|
| ContentVideo
|
149
155
|
| ContentTool
|
150
156
|
| ContentReasoning
|
157
|
+
| ContentData
|
151
158
|
| (
|
152
159
|
| ContentText
|
153
160
|
| ContentImage
|
@@ -155,6 +162,7 @@ const normalizeContent = (
|
|
155
162
|
| ContentVideo
|
156
163
|
| ContentTool
|
157
164
|
| ContentReasoning
|
165
|
+
| ContentData
|
158
166
|
)[],
|
159
167
|
): (
|
160
168
|
| ContentText
|
@@ -163,6 +171,7 @@ const normalizeContent = (
|
|
163
171
|
| ContentVideo
|
164
172
|
| ContentTool
|
165
173
|
| ContentReasoning
|
174
|
+
| ContentData
|
166
175
|
)[] => {
|
167
176
|
if (Array.isArray(output)) {
|
168
177
|
return output;
|
@@ -176,6 +185,7 @@ const normalizeContent = (
|
|
176
185
|
text: String(output),
|
177
186
|
refusal: null,
|
178
187
|
internal: null,
|
188
|
+
citations: null,
|
179
189
|
},
|
180
190
|
],
|
181
191
|
},
|
@@ -20,7 +20,7 @@ import clsx from "clsx";
|
|
20
20
|
import { useProperty, useSampleDescriptor } from "../../../state/hooks";
|
21
21
|
import { useVirtuosoState } from "../../../state/scrolling";
|
22
22
|
import { useStore } from "../../../state/store";
|
23
|
-
import { useSampleNavigation } from "../../routing/
|
23
|
+
import { useSampleNavigation } from "../../routing/sampleNavigation";
|
24
24
|
import { SampleFooter } from "./SampleFooter";
|
25
25
|
import { SampleHeader } from "./SampleHeader";
|
26
26
|
import styles from "./SampleList.module.css";
|
@@ -1,9 +1,9 @@
|
|
1
|
-
import {
|
1
|
+
import { Type21 } from "../../@types/log";
|
2
2
|
|
3
3
|
/**
|
4
4
|
* Formats a limit message
|
5
5
|
*/
|
6
|
-
export const sampleLimitMessage = (type:
|
6
|
+
export const sampleLimitMessage = (type: Type21): string => {
|
7
7
|
switch (type) {
|
8
8
|
case "operator":
|
9
9
|
return "Sample terminated due to operator limit.";
|
@@ -216,7 +216,7 @@ const ToolsConfig: FC<ToolConfigProps> = ({ tools, toolChoice }) => {
|
|
216
216
|
<div className={clsx(styles.toolConfig, "text-size-small")}>
|
217
217
|
{toolEls}
|
218
218
|
</div>
|
219
|
-
<div className={styles.toolChoice}>
|
219
|
+
<div className={clsx(styles.toolChoice, "text-size-small")}>
|
220
220
|
<div className={clsx("text-style-label", "text-style-secondary")}>
|
221
221
|
Tool Choice
|
222
222
|
</div>
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import clsx from "clsx";
|
2
2
|
import { FC } from "react";
|
3
|
-
import { SampleLimitEvent,
|
3
|
+
import { SampleLimitEvent, Type15 } from "../../../@types/log";
|
4
4
|
import { ApplicationIcons } from "../../appearance/icons";
|
5
5
|
import { EventPanel } from "./event/EventPanel";
|
6
6
|
import { EventNode } from "./types";
|
@@ -17,12 +17,12 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
|
|
17
17
|
eventNode,
|
18
18
|
className,
|
19
19
|
}) => {
|
20
|
-
const resolve_title = (type:
|
20
|
+
const resolve_title = (type: Type15) => {
|
21
21
|
switch (type) {
|
22
22
|
case "custom":
|
23
23
|
return "Custom Limit Exceeded";
|
24
24
|
case "time":
|
25
|
-
return "Time Limit
|
25
|
+
return "Time Limit Exceeded";
|
26
26
|
case "message":
|
27
27
|
return "Message Limit Exceeded";
|
28
28
|
case "token":
|
@@ -34,7 +34,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
|
|
34
34
|
}
|
35
35
|
};
|
36
36
|
|
37
|
-
const resolve_icon = (type:
|
37
|
+
const resolve_icon = (type: Type15) => {
|
38
38
|
switch (type) {
|
39
39
|
case "custom":
|
40
40
|
return ApplicationIcons.limits.custom;
|
@@ -16,7 +16,7 @@ import { useScrollTrack, useVirtuosoState } from "../../../../state/scrolling";
|
|
16
16
|
import { useStore } from "../../../../state/store";
|
17
17
|
import { flatTree } from "../transform/treeify";
|
18
18
|
|
19
|
-
import { useSampleDetailNavigation } from "../../../routing/
|
19
|
+
import { useSampleDetailNavigation } from "../../../routing/sampleNavigation";
|
20
20
|
import { kSandboxSignalName } from "../transform/fixups";
|
21
21
|
import { OutlineRow } from "./OutlineRow";
|
22
22
|
import styles from "./TranscriptOutline.module.css";
|
@@ -51,8 +51,11 @@ export const MarkdownDiv = forwardRef<HTMLDivElement, MarkdownDivProps>(
|
|
51
51
|
// For `code` tags, reverse the escaping if we can
|
52
52
|
const withCode = unescapeCodeHtmlEntities(unescaped);
|
53
53
|
|
54
|
+
// For `sup` tags, reverse the escaping if we can
|
55
|
+
const withSup = unescapeSupHtmlEntities(withCode);
|
56
|
+
|
54
57
|
// Return the rendered markdown
|
55
|
-
const markup = { __html:
|
58
|
+
const markup = { __html: withSup };
|
56
59
|
|
57
60
|
return (
|
58
61
|
<div
|
@@ -65,7 +68,7 @@ export const MarkdownDiv = forwardRef<HTMLDivElement, MarkdownDivProps>(
|
|
65
68
|
},
|
66
69
|
);
|
67
70
|
|
68
|
-
const kLetterListPattern = /^([a-zA-
|
71
|
+
const kLetterListPattern = /^([a-zA-Z][).]\s.*?)$/gm;
|
69
72
|
const kCommonmarkReferenceLinkPattern = /\[([^\]]*)\]: (?!http)(.*)/g;
|
70
73
|
|
71
74
|
const protectBackslashesInLatex = (content: string): string => {
|
@@ -193,6 +196,16 @@ const unprotectMarkdown = (txt: string): string => {
|
|
193
196
|
return txt;
|
194
197
|
};
|
195
198
|
|
199
|
+
function unescapeSupHtmlEntities(str: string): string {
|
200
|
+
// replace <sup> with <sup>
|
201
|
+
if (!str) {
|
202
|
+
return str;
|
203
|
+
}
|
204
|
+
return str
|
205
|
+
.replace(/<sup>/g, "<sup>")
|
206
|
+
.replace(/<\/sup>/g, "</sup>");
|
207
|
+
}
|
208
|
+
|
196
209
|
function unescapeCodeHtmlEntities(str: string): string {
|
197
210
|
if (!str) return str;
|
198
211
|
|
@@ -5,8 +5,8 @@ This directory contains the test files for the application. The test framework i
|
|
5
5
|
## Directory Structure
|
6
6
|
|
7
7
|
- `tests/`: Root directory for all tests
|
8
|
-
|
9
|
-
|
8
|
+
- `__mocks__/`: Mock files for CSS modules and other assets
|
9
|
+
- `setupTests.mjs`: Setup file for Jest tests
|
10
10
|
|
11
11
|
## Running Tests
|
12
12
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
* Generates a GitHub commit URL based on the repository origin URL and the commit hash.
|
3
3
|
*/
|
4
4
|
export const ghCommitUrl = (origin: string, commit: string): string => {
|
5
|
-
const baseUrl = origin
|
5
|
+
const baseUrl = origin
|
6
|
+
.replace(/\.git$/, "")
|
7
|
+
.replace(/^git@github.com:/, "https://github.com/");
|
6
8
|
return `${baseUrl}/commit/${commit}`;
|
7
9
|
};
|
@@ -4,3 +4,9 @@
|
|
4
4
|
export function escapeSelector(id: string): string {
|
5
5
|
return id.replace(/([ #.;,?!+*~'":^$[\]()=>|/\\])/g, "\\$1");
|
6
6
|
}
|
7
|
+
|
8
|
+
export const decodeHtmlEntities = (text: string): string => {
|
9
|
+
const parser = new DOMParser();
|
10
|
+
const doc = parser.parseFromString(text, "text/html");
|
11
|
+
return doc.documentElement.textContent || text;
|
12
|
+
};
|
inspect_ai/agent/_handoff.py
CHANGED
@@ -37,9 +37,9 @@ def handoff(
|
|
37
37
|
Use the built-in `last_message` filter to return only the last message
|
38
38
|
or alternatively specify a custom `MessageFilter` function.
|
39
39
|
tool_name: Alternate tool name (defaults to `transfer_to_{agent_name}`)
|
40
|
-
limits: List of limits to apply to the agent.
|
41
|
-
the agent
|
42
|
-
exceeded.
|
40
|
+
limits: List of limits to apply to the agent. Limits are scoped to each
|
41
|
+
handoff to the agent. Should a limit be exceeded, the agent stops and a user
|
42
|
+
message is appended explaining that a limit was exceeded.
|
43
43
|
**agent_kwargs: Arguments to curry to `Agent` function (arguments provided here
|
44
44
|
will not be presented to the model as part of the tool interface).
|
45
45
|
|
inspect_ai/log/_condense.py
CHANGED
@@ -9,6 +9,7 @@ from inspect_ai._util.constants import BASE_64_DATA_REMOVED
|
|
9
9
|
from inspect_ai._util.content import (
|
10
10
|
Content,
|
11
11
|
ContentAudio,
|
12
|
+
ContentData,
|
12
13
|
ContentImage,
|
13
14
|
ContentReasoning,
|
14
15
|
ContentText,
|
@@ -344,3 +345,7 @@ def walk_content(content: Content, content_fn: Callable[[str], str]) -> Content:
|
|
344
345
|
return content.model_copy(update=dict(video=content_fn(content.video)))
|
345
346
|
elif isinstance(content, ContentReasoning):
|
346
347
|
return content.model_copy(update=dict(reasoning=content_fn(content.reasoning)))
|
348
|
+
elif isinstance(content, ContentData):
|
349
|
+
return content.model_copy(
|
350
|
+
update=dict(data=walk_json_value(content.data, content_fn))
|
351
|
+
)
|
inspect_ai/log/_file.py
CHANGED
@@ -198,7 +198,10 @@ def write_log_dir_manifest(
|
|
198
198
|
fs = filesystem(output_dir)
|
199
199
|
manifest = f"{output_dir}{fs.sep}{filename}"
|
200
200
|
manifest_json = to_json(
|
201
|
-
value=manifest_logs,
|
201
|
+
value=jsonable_python(manifest_logs),
|
202
|
+
indent=2,
|
203
|
+
exclude_none=True,
|
204
|
+
fallback=lambda _x: None,
|
202
205
|
)
|
203
206
|
with file(manifest, mode="wb", fs_options=fs_options) as f:
|
204
207
|
f.write(manifest_json)
|
inspect_ai/log/_log.py
CHANGED
@@ -422,7 +422,7 @@ class EvalSample(BaseModel):
|
|
422
422
|
# warning will handle this)
|
423
423
|
del values["transcript"]
|
424
424
|
|
425
|
-
return
|
425
|
+
return migrate_values(values)
|
426
426
|
|
427
427
|
# allow field model_usage
|
428
428
|
model_config = ConfigDict(protected_namespaces=())
|
@@ -707,7 +707,10 @@ class EvalSpec(BaseModel):
|
|
707
707
|
"""Attributes of the @task decorator."""
|
708
708
|
|
709
709
|
task_args: dict[str, Any] = Field(default_factory=dict)
|
710
|
-
"""Arguments used for invoking the task."""
|
710
|
+
"""Arguments used for invoking the task (including defaults)."""
|
711
|
+
|
712
|
+
task_args_passed: dict[str, Any] = Field(default_factory=dict)
|
713
|
+
"""Arguments explicitly passed by caller for invoking the task."""
|
711
714
|
|
712
715
|
solver: str | None = Field(default=None)
|
713
716
|
"""Solver name."""
|
@@ -782,16 +785,18 @@ class EvalSpec(BaseModel):
|
|
782
785
|
def read_sandbox_spec(
|
783
786
|
cls: Type["EvalSpec"], values: dict[str, Any]
|
784
787
|
) -> dict[str, Any]:
|
785
|
-
return
|
788
|
+
return migrate_values(values)
|
786
789
|
|
787
790
|
|
788
|
-
def
|
791
|
+
def migrate_values(values: dict[str, Any]) -> dict[str, Any]:
|
789
792
|
if "sandbox" in values:
|
790
793
|
sandbox = values.get("sandbox")
|
791
794
|
if isinstance(sandbox, list):
|
792
795
|
values["sandbox"] = SandboxEnvironmentSpec(
|
793
796
|
type=sandbox[0], config=sandbox[1]
|
794
797
|
)
|
798
|
+
if "task_args_passed" not in values:
|
799
|
+
values["task_args_passed"] = values.get("task_args", {})
|
795
800
|
return values
|
796
801
|
|
797
802
|
|
@@ -3,6 +3,7 @@ from typing import Any, Literal, get_args
|
|
3
3
|
|
4
4
|
import ijson # type: ignore
|
5
5
|
from ijson import IncompleteJSONError
|
6
|
+
from ijson.backends.python import UnexpectedSymbol # type: ignore
|
6
7
|
from pydantic import BaseModel
|
7
8
|
from pydantic_core import from_json
|
8
9
|
from typing_extensions import override
|
@@ -129,12 +130,13 @@ class JSONRecorder(FileRecorder):
|
|
129
130
|
# The Python JSON serializer supports NaN and Inf, however
|
130
131
|
# this isn't technically part of the JSON spec. The json-stream
|
131
132
|
# library shares this limitation, so if we fail with an
|
132
|
-
# invalid character then we move on and and parse w/ pydantic
|
133
|
+
# invalid character (or Unexpected symbol) then we move on and and parse w/ pydantic
|
133
134
|
# (which does support NaN and Inf by default)
|
134
|
-
except (ValueError, IncompleteJSONError) as ex:
|
135
|
+
except (ValueError, IncompleteJSONError, UnexpectedSymbol) as ex:
|
135
136
|
if (
|
136
137
|
str(ex).find("Invalid JSON character") != -1
|
137
138
|
or str(ex).find("invalid char in json text") != -1
|
139
|
+
or str(ex).find("Unexpected symbol") != -1
|
138
140
|
):
|
139
141
|
pass
|
140
142
|
else:
|
inspect_ai/log/_util.py
CHANGED
@@ -4,6 +4,7 @@ from typing import Any
|
|
4
4
|
|
5
5
|
from inspect_ai._util.content import (
|
6
6
|
ContentAudio,
|
7
|
+
ContentData,
|
7
8
|
ContentImage,
|
8
9
|
ContentReasoning,
|
9
10
|
ContentText,
|
@@ -24,6 +25,7 @@ def text_input_only(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
|
|
24
25
|
| ContentImage
|
25
26
|
| ContentAudio
|
26
27
|
| ContentVideo
|
28
|
+
| ContentData
|
27
29
|
] = []
|
28
30
|
for content in message.content:
|
29
31
|
if content.type == "text":
|
inspect_ai/model/__init__.py
CHANGED
@@ -1,8 +1,16 @@
|
|
1
1
|
# ruff: noqa: F401 F403 F405
|
2
2
|
|
3
|
+
from inspect_ai._util.citation import (
|
4
|
+
Citation,
|
5
|
+
CitationBase,
|
6
|
+
ContentCitation,
|
7
|
+
DocumentCitation,
|
8
|
+
UrlCitation,
|
9
|
+
)
|
3
10
|
from inspect_ai._util.content import (
|
4
11
|
Content,
|
5
12
|
ContentAudio,
|
13
|
+
ContentData,
|
6
14
|
ContentImage,
|
7
15
|
ContentReasoning,
|
8
16
|
ContentText,
|
@@ -59,6 +67,7 @@ __all__ = [
|
|
59
67
|
"ResponseSchema",
|
60
68
|
"CachePolicy",
|
61
69
|
"ContentAudio",
|
70
|
+
"ContentData",
|
62
71
|
"ContentImage",
|
63
72
|
"ContentReasoning",
|
64
73
|
"ContentText",
|
@@ -93,6 +102,11 @@ __all__ = [
|
|
93
102
|
"cache_size",
|
94
103
|
"get_model",
|
95
104
|
"modelapi",
|
105
|
+
"Citation",
|
106
|
+
"CitationBase",
|
107
|
+
"DocumentCitation",
|
108
|
+
"ContentCitation",
|
109
|
+
"UrlCitation",
|
96
110
|
]
|
97
111
|
|
98
112
|
_TOOL_MODULE_VERSION = "0.3.18"
|