inspect-ai 0.3.102__py3-none-any.whl → 0.3.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. inspect_ai/_cli/common.py +2 -1
  2. inspect_ai/_cli/eval.py +2 -1
  3. inspect_ai/_display/core/active.py +3 -0
  4. inspect_ai/_display/core/config.py +1 -0
  5. inspect_ai/_display/core/panel.py +21 -13
  6. inspect_ai/_display/core/results.py +3 -7
  7. inspect_ai/_display/core/rich.py +3 -5
  8. inspect_ai/_display/log/__init__.py +0 -0
  9. inspect_ai/_display/log/display.py +173 -0
  10. inspect_ai/_display/plain/display.py +2 -2
  11. inspect_ai/_display/rich/display.py +2 -4
  12. inspect_ai/_display/textual/app.py +1 -6
  13. inspect_ai/_display/textual/widgets/task_detail.py +3 -14
  14. inspect_ai/_display/textual/widgets/tasks.py +1 -1
  15. inspect_ai/_eval/eval.py +14 -2
  16. inspect_ai/_eval/evalset.py +3 -2
  17. inspect_ai/_eval/registry.py +6 -1
  18. inspect_ai/_eval/run.py +7 -1
  19. inspect_ai/_eval/task/constants.py +1 -0
  20. inspect_ai/_eval/task/log.py +5 -1
  21. inspect_ai/_eval/task/run.py +1 -1
  22. inspect_ai/_util/citation.py +88 -0
  23. inspect_ai/_util/content.py +24 -2
  24. inspect_ai/_util/json.py +17 -2
  25. inspect_ai/_util/registry.py +19 -4
  26. inspect_ai/_view/schema.py +0 -6
  27. inspect_ai/_view/www/dist/assets/index.css +82 -24
  28. inspect_ai/_view/www/dist/assets/index.js +10124 -9808
  29. inspect_ai/_view/www/log-schema.json +418 -1
  30. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  31. inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
  32. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
  33. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
  34. inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
  35. inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
  36. inspect_ai/_view/www/package.json +2 -2
  37. inspect_ai/_view/www/src/@types/log.d.ts +140 -39
  38. inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
  39. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
  40. inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
  41. inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
  42. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
  43. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
  44. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
  45. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
  46. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
  47. inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
  48. inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
  49. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
  50. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
  51. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
  52. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
  53. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
  54. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
  55. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
  56. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
  57. inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
  58. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
  59. inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
  60. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
  61. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
  62. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
  63. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
  64. inspect_ai/_view/www/src/tests/README.md +2 -2
  65. inspect_ai/_view/www/src/utils/git.ts +3 -1
  66. inspect_ai/_view/www/src/utils/html.ts +6 -0
  67. inspect_ai/agent/_handoff.py +3 -3
  68. inspect_ai/log/_condense.py +5 -0
  69. inspect_ai/log/_file.py +4 -1
  70. inspect_ai/log/_log.py +9 -4
  71. inspect_ai/log/_recorders/eval.py +4 -3
  72. inspect_ai/log/_recorders/json.py +5 -2
  73. inspect_ai/log/_recorders/recorder.py +1 -0
  74. inspect_ai/log/_util.py +2 -0
  75. inspect_ai/model/__init__.py +14 -0
  76. inspect_ai/model/_call_tools.py +13 -4
  77. inspect_ai/model/_chat_message.py +3 -0
  78. inspect_ai/model/_openai_responses.py +80 -34
  79. inspect_ai/model/_providers/_anthropic_citations.py +158 -0
  80. inspect_ai/model/_providers/_google_citations.py +100 -0
  81. inspect_ai/model/_providers/anthropic.py +196 -34
  82. inspect_ai/model/_providers/google.py +94 -22
  83. inspect_ai/model/_providers/mistral.py +20 -7
  84. inspect_ai/model/_providers/openai.py +11 -10
  85. inspect_ai/model/_providers/openai_compatible.py +3 -2
  86. inspect_ai/model/_providers/openai_responses.py +2 -5
  87. inspect_ai/model/_providers/perplexity.py +123 -0
  88. inspect_ai/model/_providers/providers.py +13 -2
  89. inspect_ai/model/_providers/vertex.py +3 -0
  90. inspect_ai/model/_trim.py +5 -0
  91. inspect_ai/tool/__init__.py +14 -0
  92. inspect_ai/tool/_mcp/_mcp.py +5 -2
  93. inspect_ai/tool/_mcp/sampling.py +19 -3
  94. inspect_ai/tool/_mcp/server.py +1 -1
  95. inspect_ai/tool/_tool.py +10 -1
  96. inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
  97. inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
  98. inspect_ai/tool/_tools/_web_search/_google.py +22 -25
  99. inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
  100. inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
  101. inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
  102. inspect_ai/util/_display.py +11 -2
  103. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  104. inspect_ai/util/_span.py +12 -1
  105. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
  106. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +112 -88
  107. /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
  108. /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
  109. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
  110. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
  111. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
  112. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
1
+ import { FC, ReactNode } from "react";
2
+ import { WebSearch } from "./WebSearch";
3
+
4
+ import clsx from "clsx";
5
+ import { ContentData } from "../../../../@types/log";
6
+ import { RecordTree } from "../../../content/RecordTree";
7
+ import styles from "./ContentDataView.module.css";
8
+ import { WebSearchContentData, WebSearchResults } from "./WebSearchResults";
9
+
10
+ export interface ContentDataProps {
11
+ id: string;
12
+ contentData: ContentData;
13
+ }
14
+
15
+ interface RenderableData {
16
+ type: string;
17
+ name?: string;
18
+ [key: string]: any;
19
+ }
20
+
21
+ export const ContentDataView: FC<ContentDataProps> = ({ id, contentData }) => {
22
+ const renderableData = contentData.data as RenderableData;
23
+
24
+ const renderer = contentDataRenderers.find((r) =>
25
+ r.canRender(renderableData),
26
+ );
27
+
28
+ if (!renderer) {
29
+ const { encrypted_content, ...record } = renderableData;
30
+ return (
31
+ <div className={clsx(styles.contentData)}>
32
+ <RecordTree
33
+ id={`${id}-tree`}
34
+ record={record}
35
+ className={clsx(styles.data)}
36
+ defaultExpandLevel={0}
37
+ />
38
+ </div>
39
+ );
40
+ }
41
+
42
+ return (
43
+ <div className={clsx(styles.contentData)}>
44
+ {renderer.render(renderableData)}
45
+ </div>
46
+ );
47
+ };
48
+
49
+ // The following handles rendering of the content data based on its type
50
+ // and name, allowing for different renderers to be used for different types of content data.
51
+
52
+ interface ContentDataRenderer {
53
+ name: string;
54
+ canRender: (data: RenderableData) => boolean;
55
+ render: (data: RenderableData) => ReactNode;
56
+ }
57
+
58
+ const webSearchServerToolRenderer: ContentDataRenderer = {
59
+ name: "WebSearch",
60
+ canRender: (data: RenderableData) => {
61
+ return data.type === "server_tool_use" && data.name === "web_search";
62
+ },
63
+ render: (data: RenderableData): ReactNode => {
64
+ return <WebSearch query={data.input.query} />;
65
+ },
66
+ };
67
+
68
+ const webSearchResultsServerToolRenderer: ContentDataRenderer = {
69
+ name: "WebSearchResults",
70
+ canRender: (data: RenderableData) => {
71
+ return (
72
+ data.type === "web_search_tool_result" && Array.isArray(data.content)
73
+ );
74
+ },
75
+ render: (data: RenderableData): ReactNode => {
76
+ const results: WebSearchContentData[] =
77
+ data.content as WebSearchContentData[];
78
+ return <WebSearchResults results={results} />;
79
+ },
80
+ };
81
+
82
+ const serverToolRenderer: ContentDataRenderer = {
83
+ name: "ServerTool",
84
+ canRender: (data: RenderableData) => data.type === "server_tool_use",
85
+ render: (data: RenderableData): ReactNode => {
86
+ return (
87
+ <>
88
+ <div
89
+ className={clsx(
90
+ "text-style-label",
91
+ "text-style-secondary",
92
+ "text-size-smaller",
93
+ )}
94
+ >
95
+ Server Tool
96
+ </div>
97
+ <RecordTree
98
+ id={data.name || "server-tool"}
99
+ record={data}
100
+ className={clsx(styles.data)}
101
+ />
102
+ </>
103
+ );
104
+ },
105
+ };
106
+
107
+ export const contentDataRenderers: ContentDataRenderer[] = [
108
+ webSearchServerToolRenderer,
109
+ webSearchResultsServerToolRenderer,
110
+ serverToolRenderer,
111
+ ];
@@ -0,0 +1,10 @@
1
+ .webSearch {
2
+ display: grid;
3
+ grid-template-columns: max-content 1fr;
4
+ column-gap: 0.5em;
5
+ align-items: baseline;
6
+ }
7
+
8
+ .query {
9
+ font-family: var(--bs-font-monospace);
10
+ }
@@ -0,0 +1,14 @@
1
+ import clsx from "clsx";
2
+ import { FC } from "react";
3
+ import styles from "./WebSearch.module.css";
4
+
5
+ export const WebSearch: FC<{ query: string }> = ({ query }) => {
6
+ return (
7
+ <div className={clsx(styles.webSearch, "text-size-smaller")}>
8
+ <span className={clsx("text-style-label", "text-style-secondary")}>
9
+ Web Search:
10
+ </span>
11
+ <span className={clsx(styles.query, "text-size-smallest")}>{query}</span>
12
+ </div>
13
+ );
14
+ };
@@ -0,0 +1,19 @@
1
+ .webSearch {
2
+ display: grid;
3
+ grid-template-columns: max-content 1fr;
4
+ column-gap: 0.5em;
5
+ align-items: baseline;
6
+ }
7
+
8
+ .query {
9
+ font-family: var(--bs-font-monospace);
10
+ }
11
+
12
+ .result a:hover {
13
+ text-decoration: underline;
14
+ }
15
+
16
+ .result a {
17
+ opacity: 0.8;
18
+ text-decoration: none;
19
+ }
@@ -0,0 +1,49 @@
1
+ import clsx from "clsx";
2
+ import { FC } from "react";
3
+ import styles from "./WebSearchResults.module.css";
4
+
5
+ export interface WebSearchContentData {
6
+ title: string;
7
+ url: string;
8
+ page_age: string;
9
+ }
10
+
11
+ export const WebSearchResults: FC<{ results: WebSearchContentData[] }> = ({
12
+ results,
13
+ }) => {
14
+ return (
15
+ <>
16
+ <div
17
+ className={clsx(
18
+ styles.label,
19
+ "text-style-label",
20
+ "text-style-secondary",
21
+ "text-size-smaller",
22
+ )}
23
+ >
24
+ Results
25
+ </div>
26
+
27
+ <ol className={clsx(styles.results, "text-size-smaller")}>
28
+ {results.map((result, index) => (
29
+ <li
30
+ key={index}
31
+ className={clsx(styles.result, "text-style-secondary")}
32
+ >
33
+ <a
34
+ href={result.url}
35
+ target="_blank"
36
+ rel="noopener noreferrer"
37
+ title={
38
+ result.url +
39
+ (result.page_age ? `\n(Age: ${result.page_age})` : "")
40
+ }
41
+ >
42
+ {result.title}
43
+ </a>
44
+ </li>
45
+ ))}
46
+ </ol>
47
+ </>
48
+ );
49
+ };
@@ -4,6 +4,7 @@ import {
4
4
  ChatMessageTool,
5
5
  ChatMessageUser,
6
6
  ContentAudio,
7
+ ContentData,
7
8
  ContentImage,
8
9
  ContentReasoning,
9
10
  ContentText,
@@ -64,6 +65,7 @@ export const resolveMessages = (messages: Messages) => {
64
65
  | ContentAudio
65
66
  | ContentVideo
66
67
  | ContentReasoning
68
+ | ContentData
67
69
  )[] = [];
68
70
  for (const systemMessage of systemMessages) {
69
71
  const contents = Array.isArray(systemMessage.content)
@@ -78,6 +80,7 @@ export const resolveMessages = (messages: Messages) => {
78
80
  content: systemContent,
79
81
  source: "input",
80
82
  internal: null,
83
+ metadata: null,
81
84
  };
82
85
 
83
86
  // Converge them
@@ -117,19 +120,22 @@ const normalizeContent = (
117
120
  | ContentAudio
118
121
  | ContentVideo
119
122
  | ContentReasoning
123
+ | ContentData
120
124
  | string,
121
125
  ):
122
126
  | ContentText
123
127
  | ContentImage
124
128
  | ContentAudio
125
129
  | ContentVideo
126
- | ContentReasoning => {
130
+ | ContentReasoning
131
+ | ContentData => {
127
132
  if (typeof content === "string") {
128
133
  return {
129
134
  type: "text",
130
135
  text: content,
131
136
  refusal: null,
132
137
  internal: null,
138
+ citations: null,
133
139
  };
134
140
  } else {
135
141
  return content;
@@ -2,6 +2,7 @@ import clsx from "clsx";
2
2
  import { FC, useMemo } from "react";
3
3
  import {
4
4
  ContentAudio,
5
+ ContentData,
5
6
  ContentImage,
6
7
  ContentReasoning,
7
8
  ContentText,
@@ -11,6 +12,7 @@ import {
11
12
  import { ContentTool } from "../../../../app/types";
12
13
  import ExpandablePanel from "../../../../components/ExpandablePanel";
13
14
  import { MessageContent } from "../MessageContent";
15
+ import { defaultContext } from "../MessageContents";
14
16
  import styles from "./ToolCallView.module.css";
15
17
  import { ToolInput } from "./ToolInput";
16
18
  import { ToolTitle } from "./ToolTitle";
@@ -31,6 +33,7 @@ interface ToolCallViewProps {
31
33
  | ContentVideo
32
34
  | ContentTool
33
35
  | ContentReasoning
36
+ | ContentData
34
37
  | (
35
38
  | ContentText
36
39
  | ContentAudio
@@ -38,6 +41,7 @@ interface ToolCallViewProps {
38
41
  | ContentVideo
39
42
  | ContentTool
40
43
  | ContentReasoning
44
+ | ContentData
41
45
  )[];
42
46
  mode?: "compact";
43
47
  }
@@ -65,7 +69,8 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
65
69
  | ContentImage
66
70
  | ContentVideo
67
71
  | ContentTool
68
- | ContentReasoning,
72
+ | ContentReasoning
73
+ | ContentData,
69
74
  ) {
70
75
  if (value && typeof value === "object") {
71
76
  if (value.type === "image") {
@@ -105,6 +110,7 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
105
110
  });
106
111
 
107
112
  const contents = mode !== "compact" ? input : input || functionCall;
113
+ const context = defaultContext();
108
114
  return (
109
115
  <div className={clsx(styles.toolCallView)}>
110
116
  <div>
@@ -127,7 +133,7 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
127
133
  lines={15}
128
134
  className={clsx("text-size-small")}
129
135
  >
130
- <MessageContent contents={normalizedContent} />
136
+ <MessageContent contents={normalizedContent} context={context} />
131
137
  </ExpandablePanel>
132
138
  ) : undefined}
133
139
  </div>
@@ -148,6 +154,7 @@ const normalizeContent = (
148
154
  | ContentVideo
149
155
  | ContentTool
150
156
  | ContentReasoning
157
+ | ContentData
151
158
  | (
152
159
  | ContentText
153
160
  | ContentImage
@@ -155,6 +162,7 @@ const normalizeContent = (
155
162
  | ContentVideo
156
163
  | ContentTool
157
164
  | ContentReasoning
165
+ | ContentData
158
166
  )[],
159
167
  ): (
160
168
  | ContentText
@@ -163,6 +171,7 @@ const normalizeContent = (
163
171
  | ContentVideo
164
172
  | ContentTool
165
173
  | ContentReasoning
174
+ | ContentData
166
175
  )[] => {
167
176
  if (Array.isArray(output)) {
168
177
  return output;
@@ -176,6 +185,7 @@ const normalizeContent = (
176
185
  text: String(output),
177
186
  refusal: null,
178
187
  internal: null,
188
+ citations: null,
179
189
  },
180
190
  ],
181
191
  },
@@ -1 +1,5 @@
1
+ import { Citations } from "../../../@types/log";
2
+
1
3
  export type ChatViewToolCallStyle = "compact" | "complete" | "omit";
4
+
5
+ export type Citation = NonNullable<Citations>[number];
@@ -20,7 +20,7 @@ import clsx from "clsx";
20
20
  import { useProperty, useSampleDescriptor } from "../../../state/hooks";
21
21
  import { useVirtuosoState } from "../../../state/scrolling";
22
22
  import { useStore } from "../../../state/store";
23
- import { useSampleNavigation } from "../../routing/navigationHooks";
23
+ import { useSampleNavigation } from "../../routing/sampleNavigation";
24
24
  import { SampleFooter } from "./SampleFooter";
25
25
  import { SampleHeader } from "./SampleHeader";
26
26
  import styles from "./SampleList.module.css";
@@ -1,9 +1,9 @@
1
- import { Type14 } from "../../@types/log";
1
+ import { Type21 } from "../../@types/log";
2
2
 
3
3
  /**
4
4
  * Formats a limit message
5
5
  */
6
- export const sampleLimitMessage = (type: Type14): string => {
6
+ export const sampleLimitMessage = (type: Type21): string => {
7
7
  switch (type) {
8
8
  case "operator":
9
9
  return "Sample terminated due to operator limit.";
@@ -216,7 +216,7 @@ const ToolsConfig: FC<ToolConfigProps> = ({ tools, toolChoice }) => {
216
216
  <div className={clsx(styles.toolConfig, "text-size-small")}>
217
217
  {toolEls}
218
218
  </div>
219
- <div className={styles.toolChoice}>
219
+ <div className={clsx(styles.toolChoice, "text-size-small")}>
220
220
  <div className={clsx("text-style-label", "text-style-secondary")}>
221
221
  Tool Choice
222
222
  </div>
@@ -1,6 +1,6 @@
1
1
  import clsx from "clsx";
2
2
  import { FC } from "react";
3
- import { SampleLimitEvent, Type10 } from "../../../@types/log";
3
+ import { SampleLimitEvent, Type15 } from "../../../@types/log";
4
4
  import { ApplicationIcons } from "../../appearance/icons";
5
5
  import { EventPanel } from "./event/EventPanel";
6
6
  import { EventNode } from "./types";
@@ -17,12 +17,12 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
17
17
  eventNode,
18
18
  className,
19
19
  }) => {
20
- const resolve_title = (type: Type10) => {
20
+ const resolve_title = (type: Type15) => {
21
21
  switch (type) {
22
22
  case "custom":
23
23
  return "Custom Limit Exceeded";
24
24
  case "time":
25
- return "Time Limit Execeeded";
25
+ return "Time Limit Exceeded";
26
26
  case "message":
27
27
  return "Message Limit Exceeded";
28
28
  case "token":
@@ -34,7 +34,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
34
34
  }
35
35
  };
36
36
 
37
- const resolve_icon = (type: Type10) => {
37
+ const resolve_icon = (type: Type15) => {
38
38
  switch (type) {
39
39
  case "custom":
40
40
  return ApplicationIcons.limits.custom;
@@ -16,7 +16,7 @@ import { useScrollTrack, useVirtuosoState } from "../../../../state/scrolling";
16
16
  import { useStore } from "../../../../state/store";
17
17
  import { flatTree } from "../transform/treeify";
18
18
 
19
- import { useSampleDetailNavigation } from "../../../routing/navigationHooks";
19
+ import { useSampleDetailNavigation } from "../../../routing/sampleNavigation";
20
20
  import { kSandboxSignalName } from "../transform/fixups";
21
21
  import { OutlineRow } from "./OutlineRow";
22
22
  import styles from "./TranscriptOutline.module.css";
@@ -51,8 +51,11 @@ export const MarkdownDiv = forwardRef<HTMLDivElement, MarkdownDivProps>(
51
51
  // For `code` tags, reverse the escaping if we can
52
52
  const withCode = unescapeCodeHtmlEntities(unescaped);
53
53
 
54
+ // For `sup` tags, reverse the escaping if we can
55
+ const withSup = unescapeSupHtmlEntities(withCode);
56
+
54
57
  // Return the rendered markdown
55
- const markup = { __html: withCode };
58
+ const markup = { __html: withSup };
56
59
 
57
60
  return (
58
61
  <div
@@ -65,7 +68,7 @@ export const MarkdownDiv = forwardRef<HTMLDivElement, MarkdownDivProps>(
65
68
  },
66
69
  );
67
70
 
68
- const kLetterListPattern = /^([a-zA-Z0-9][).]\s.*?)$/gm;
71
+ const kLetterListPattern = /^([a-zA-Z][).]\s.*?)$/gm;
69
72
  const kCommonmarkReferenceLinkPattern = /\[([^\]]*)\]: (?!http)(.*)/g;
70
73
 
71
74
  const protectBackslashesInLatex = (content: string): string => {
@@ -193,6 +196,16 @@ const unprotectMarkdown = (txt: string): string => {
193
196
  return txt;
194
197
  };
195
198
 
199
+ function unescapeSupHtmlEntities(str: string): string {
200
+ // replace &lt;sup&gt; with <sup>
201
+ if (!str) {
202
+ return str;
203
+ }
204
+ return str
205
+ .replace(/&lt;sup&gt;/g, "<sup>")
206
+ .replace(/&lt;\/sup&gt;/g, "</sup>");
207
+ }
208
+
196
209
  function unescapeCodeHtmlEntities(str: string): string {
197
210
  if (!str) return str;
198
211
 
@@ -5,8 +5,8 @@ This directory contains the test files for the application. The test framework i
5
5
  ## Directory Structure
6
6
 
7
7
  - `tests/`: Root directory for all tests
8
- - `__mocks__/`: Mock files for CSS modules and other assets
9
- - `setupTests.mjs`: Setup file for Jest tests
8
+ - `__mocks__/`: Mock files for CSS modules and other assets
9
+ - `setupTests.mjs`: Setup file for Jest tests
10
10
 
11
11
  ## Running Tests
12
12
 
@@ -2,6 +2,8 @@
2
2
  * Generates a GitHub commit URL based on the repository origin URL and the commit hash.
3
3
  */
4
4
  export const ghCommitUrl = (origin: string, commit: string): string => {
5
- const baseUrl = origin.replace(/\.git$/, "");
5
+ const baseUrl = origin
6
+ .replace(/\.git$/, "")
7
+ .replace(/^git@github.com:/, "https://github.com/");
6
8
  return `${baseUrl}/commit/${commit}`;
7
9
  };
@@ -4,3 +4,9 @@
4
4
  export function escapeSelector(id: string): string {
5
5
  return id.replace(/([ #.;,?!+*~'":^$[\]()=>|/\\])/g, "\\$1");
6
6
  }
7
+
8
+ export const decodeHtmlEntities = (text: string): string => {
9
+ const parser = new DOMParser();
10
+ const doc = parser.parseFromString(text, "text/html");
11
+ return doc.documentElement.textContent || text;
12
+ };
@@ -37,9 +37,9 @@ def handoff(
37
37
  Use the built-in `last_message` filter to return only the last message
38
38
  or alternatively specify a custom `MessageFilter` function.
39
39
  tool_name: Alternate tool name (defaults to `transfer_to_{agent_name}`)
40
- limits: List of limits to apply to the agent. Should a limit be exceeded,
41
- the agent stops and a user message is appended explaining that a limit was
42
- exceeded.
40
+ limits: List of limits to apply to the agent. Limits are scoped to each
41
+ handoff to the agent. Should a limit be exceeded, the agent stops and a user
42
+ message is appended explaining that a limit was exceeded.
43
43
  **agent_kwargs: Arguments to curry to `Agent` function (arguments provided here
44
44
  will not be presented to the model as part of the tool interface).
45
45
 
@@ -9,6 +9,7 @@ from inspect_ai._util.constants import BASE_64_DATA_REMOVED
9
9
  from inspect_ai._util.content import (
10
10
  Content,
11
11
  ContentAudio,
12
+ ContentData,
12
13
  ContentImage,
13
14
  ContentReasoning,
14
15
  ContentText,
@@ -344,3 +345,7 @@ def walk_content(content: Content, content_fn: Callable[[str], str]) -> Content:
344
345
  return content.model_copy(update=dict(video=content_fn(content.video)))
345
346
  elif isinstance(content, ContentReasoning):
346
347
  return content.model_copy(update=dict(reasoning=content_fn(content.reasoning)))
348
+ elif isinstance(content, ContentData):
349
+ return content.model_copy(
350
+ update=dict(data=walk_json_value(content.data, content_fn))
351
+ )
inspect_ai/log/_file.py CHANGED
@@ -198,7 +198,10 @@ def write_log_dir_manifest(
198
198
  fs = filesystem(output_dir)
199
199
  manifest = f"{output_dir}{fs.sep}{filename}"
200
200
  manifest_json = to_json(
201
- value=manifest_logs, indent=2, exclude_none=True, fallback=lambda _x: None
201
+ value=jsonable_python(manifest_logs),
202
+ indent=2,
203
+ exclude_none=True,
204
+ fallback=lambda _x: None,
202
205
  )
203
206
  with file(manifest, mode="wb", fs_options=fs_options) as f:
204
207
  f.write(manifest_json)
inspect_ai/log/_log.py CHANGED
@@ -422,7 +422,7 @@ class EvalSample(BaseModel):
422
422
  # warning will handle this)
423
423
  del values["transcript"]
424
424
 
425
- return migrate_sandbox_spec(values)
425
+ return migrate_values(values)
426
426
 
427
427
  # allow field model_usage
428
428
  model_config = ConfigDict(protected_namespaces=())
@@ -707,7 +707,10 @@ class EvalSpec(BaseModel):
707
707
  """Attributes of the @task decorator."""
708
708
 
709
709
  task_args: dict[str, Any] = Field(default_factory=dict)
710
- """Arguments used for invoking the task."""
710
+ """Arguments used for invoking the task (including defaults)."""
711
+
712
+ task_args_passed: dict[str, Any] = Field(default_factory=dict)
713
+ """Arguments explicitly passed by caller for invoking the task."""
711
714
 
712
715
  solver: str | None = Field(default=None)
713
716
  """Solver name."""
@@ -782,16 +785,18 @@ class EvalSpec(BaseModel):
782
785
  def read_sandbox_spec(
783
786
  cls: Type["EvalSpec"], values: dict[str, Any]
784
787
  ) -> dict[str, Any]:
785
- return migrate_sandbox_spec(values)
788
+ return migrate_values(values)
786
789
 
787
790
 
788
- def migrate_sandbox_spec(values: dict[str, Any]) -> dict[str, Any]:
791
+ def migrate_values(values: dict[str, Any]) -> dict[str, Any]:
789
792
  if "sandbox" in values:
790
793
  sandbox = values.get("sandbox")
791
794
  if isinstance(sandbox, list):
792
795
  values["sandbox"] = SandboxEnvironmentSpec(
793
796
  type=sandbox[0], config=sandbox[1]
794
797
  )
798
+ if "task_args_passed" not in values:
799
+ values["task_args_passed"] = values.get("task_args", {})
795
800
  return values
796
801
 
797
802
 
@@ -133,6 +133,7 @@ class EvalRecorder(FileRecorder):
133
133
  results: EvalResults | None,
134
134
  reductions: list[EvalSampleReductions] | None,
135
135
  error: EvalError | None = None,
136
+ header_only: bool = False,
136
137
  ) -> EvalLog:
137
138
  # get the key and log
138
139
  key = self._log_file_key(eval)
@@ -174,7 +175,7 @@ class EvalRecorder(FileRecorder):
174
175
 
175
176
  # flush and write the results
176
177
  await log.flush()
177
- return await log.close()
178
+ return await log.close(header_only)
178
179
 
179
180
  @classmethod
180
181
  @override
@@ -321,12 +322,12 @@ class ZipLogFile:
321
322
  # re-open zip file w/ self.temp_file pointer at end
322
323
  self._open()
323
324
 
324
- async def close(self) -> EvalLog:
325
+ async def close(self, header_only: bool) -> EvalLog:
325
326
  async with self._lock:
326
327
  # read the log from the temp file then close it
327
328
  try:
328
329
  self._temp_file.seek(0)
329
- return _read_log(self._temp_file, self._file)
330
+ return _read_log(self._temp_file, self._file, header_only=header_only)
330
331
  finally:
331
332
  self._temp_file.close()
332
333
  if self._zip:
@@ -3,6 +3,7 @@ from typing import Any, Literal, get_args
3
3
 
4
4
  import ijson # type: ignore
5
5
  from ijson import IncompleteJSONError
6
+ from ijson.backends.python import UnexpectedSymbol # type: ignore
6
7
  from pydantic import BaseModel
7
8
  from pydantic_core import from_json
8
9
  from typing_extensions import override
@@ -96,6 +97,7 @@ class JSONRecorder(FileRecorder):
96
97
  results: EvalResults | None,
97
98
  reductions: list[EvalSampleReductions] | None,
98
99
  error: EvalError | None = None,
100
+ header_only: bool = False,
99
101
  ) -> EvalLog:
100
102
  log = self.data[self._log_file_key(spec)]
101
103
  log.data.status = status
@@ -128,12 +130,13 @@ class JSONRecorder(FileRecorder):
128
130
  # The Python JSON serializer supports NaN and Inf, however
129
131
  # this isn't technically part of the JSON spec. The json-stream
130
132
  # library shares this limitation, so if we fail with an
131
- # invalid character then we move on and and parse w/ pydantic
133
+ # invalid character (or Unexpected symbol) then we move on and and parse w/ pydantic
132
134
  # (which does support NaN and Inf by default)
133
- except (ValueError, IncompleteJSONError) as ex:
135
+ except (ValueError, IncompleteJSONError, UnexpectedSymbol) as ex:
134
136
  if (
135
137
  str(ex).find("Invalid JSON character") != -1
136
138
  or str(ex).find("invalid char in json text") != -1
139
+ or str(ex).find("Unexpected symbol") != -1
137
140
  ):
138
141
  pass
139
142
  else:
@@ -46,6 +46,7 @@ class Recorder(abc.ABC):
46
46
  results: EvalResults | None,
47
47
  reductions: list[EvalSampleReductions] | None,
48
48
  error: EvalError | None = None,
49
+ header_only: bool = False,
49
50
  ) -> EvalLog: ...
50
51
 
51
52
  @classmethod
inspect_ai/log/_util.py CHANGED
@@ -4,6 +4,7 @@ from typing import Any
4
4
 
5
5
  from inspect_ai._util.content import (
6
6
  ContentAudio,
7
+ ContentData,
7
8
  ContentImage,
8
9
  ContentReasoning,
9
10
  ContentText,
@@ -24,6 +25,7 @@ def text_input_only(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
24
25
  | ContentImage
25
26
  | ContentAudio
26
27
  | ContentVideo
28
+ | ContentData
27
29
  ] = []
28
30
  for content in message.content:
29
31
  if content.type == "text":