PyPI - inspect-ai - Versions diffs - 0.3.102__py3-none-any.whl → 0.3.104__py3-none-any.whl - Mend

inspect-ai 0.3.102py3-none-any.whl → 0.3.104py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

inspect_ai/_cli/common.py +2 -1
inspect_ai/_cli/eval.py +2 -1
inspect_ai/_display/core/active.py +3 -0
inspect_ai/_display/core/config.py +1 -0
inspect_ai/_display/core/panel.py +21 -13
inspect_ai/_display/core/results.py +3 -7
inspect_ai/_display/core/rich.py +3 -5
inspect_ai/_display/log/__init__.py +0 -0
inspect_ai/_display/log/display.py +173 -0
inspect_ai/_display/plain/display.py +2 -2
inspect_ai/_display/rich/display.py +2 -4
inspect_ai/_display/textual/app.py +1 -6
inspect_ai/_display/textual/widgets/task_detail.py +3 -14
inspect_ai/_display/textual/widgets/tasks.py +1 -1
inspect_ai/_eval/eval.py +14 -2
inspect_ai/_eval/evalset.py +3 -2
inspect_ai/_eval/registry.py +6 -1
inspect_ai/_eval/run.py +7 -1
inspect_ai/_eval/task/constants.py +1 -0
inspect_ai/_eval/task/log.py +5 -1
inspect_ai/_eval/task/run.py +1 -1
inspect_ai/_util/citation.py +88 -0
inspect_ai/_util/content.py +24 -2
inspect_ai/_util/json.py +17 -2
inspect_ai/_util/registry.py +19 -4
inspect_ai/_view/schema.py +0 -6
inspect_ai/_view/www/dist/assets/index.css +82 -24
inspect_ai/_view/www/dist/assets/index.js +10124 -9808
inspect_ai/_view/www/log-schema.json +418 -1
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
inspect_ai/_view/www/package.json +2 -2
inspect_ai/_view/www/src/@types/log.d.ts +140 -39
inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
inspect_ai/_view/www/src/tests/README.md +2 -2
inspect_ai/_view/www/src/utils/git.ts +3 -1
inspect_ai/_view/www/src/utils/html.ts +6 -0
inspect_ai/agent/_handoff.py +3 -3
inspect_ai/log/_condense.py +5 -0
inspect_ai/log/_file.py +4 -1
inspect_ai/log/_log.py +9 -4
inspect_ai/log/_recorders/eval.py +4 -3
inspect_ai/log/_recorders/json.py +5 -2
inspect_ai/log/_recorders/recorder.py +1 -0
inspect_ai/log/_util.py +2 -0
inspect_ai/model/__init__.py +14 -0
inspect_ai/model/_call_tools.py +13 -4
inspect_ai/model/_chat_message.py +3 -0
inspect_ai/model/_openai_responses.py +80 -34
inspect_ai/model/_providers/_anthropic_citations.py +158 -0
inspect_ai/model/_providers/_google_citations.py +100 -0
inspect_ai/model/_providers/anthropic.py +196 -34
inspect_ai/model/_providers/google.py +94 -22
inspect_ai/model/_providers/mistral.py +20 -7
inspect_ai/model/_providers/openai.py +11 -10
inspect_ai/model/_providers/openai_compatible.py +3 -2
inspect_ai/model/_providers/openai_responses.py +2 -5
inspect_ai/model/_providers/perplexity.py +123 -0
inspect_ai/model/_providers/providers.py +13 -2
inspect_ai/model/_providers/vertex.py +3 -0
inspect_ai/model/_trim.py +5 -0
inspect_ai/tool/__init__.py +14 -0
inspect_ai/tool/_mcp/_mcp.py +5 -2
inspect_ai/tool/_mcp/sampling.py +19 -3
inspect_ai/tool/_mcp/server.py +1 -1
inspect_ai/tool/_tool.py +10 -1
inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
inspect_ai/tool/_tools/_web_search/_google.py +22 -25
inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
inspect_ai/util/_display.py +11 -2
inspect_ai/util/_sandbox/docker/compose.py +2 -2
inspect_ai/util/_span.py +12 -1
{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +112 -88
/inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
/inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0

inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx ADDED Viewed

@@ -0,0 +1,111 @@
+import { FC, ReactNode } from "react";
+import { WebSearch } from "./WebSearch";
+import clsx from "clsx";
+import { ContentData } from "../../../../@types/log";
+import { RecordTree } from "../../../content/RecordTree";
+import styles from "./ContentDataView.module.css";
+import { WebSearchContentData, WebSearchResults } from "./WebSearchResults";
+export interface ContentDataProps {
+  id: string;
+  contentData: ContentData;
+}
+interface RenderableData {
+  type: string;
+  name?: string;
+  [key: string]: any;
+}
+export const ContentDataView: FC<ContentDataProps> = ({ id, contentData }) => {
+  const renderableData = contentData.data as RenderableData;
+  const renderer = contentDataRenderers.find((r) =>
+    r.canRender(renderableData),
+  );
+  if (!renderer) {
+    const { encrypted_content, ...record } = renderableData;
+    return (
+      <div className={clsx(styles.contentData)}>
+        <RecordTree
+          id={`${id}-tree`}
+          record={record}
+          className={clsx(styles.data)}
+          defaultExpandLevel={0}
+        />
+      </div>
+    );
+  }
+  return (
+    <div className={clsx(styles.contentData)}>
+      {renderer.render(renderableData)}
+    </div>
+  );
+};
+// The following handles rendering of the content data based on its type
+// and name, allowing for different renderers to be used for different types of content data.
+interface ContentDataRenderer {
+  name: string;
+  canRender: (data: RenderableData) => boolean;
+  render: (data: RenderableData) => ReactNode;
+}
+const webSearchServerToolRenderer: ContentDataRenderer = {
+  name: "WebSearch",
+  canRender: (data: RenderableData) => {
+    return data.type === "server_tool_use" && data.name === "web_search";
+  },
+  render: (data: RenderableData): ReactNode => {
+    return <WebSearch query={data.input.query} />;
+  },
+};
+const webSearchResultsServerToolRenderer: ContentDataRenderer = {
+  name: "WebSearchResults",
+  canRender: (data: RenderableData) => {
+    return (
+      data.type === "web_search_tool_result" && Array.isArray(data.content)
+    );
+  },
+  render: (data: RenderableData): ReactNode => {
+    const results: WebSearchContentData[] =
+      data.content as WebSearchContentData[];
+    return <WebSearchResults results={results} />;
+  },
+};
+const serverToolRenderer: ContentDataRenderer = {
+  name: "ServerTool",
+  canRender: (data: RenderableData) => data.type === "server_tool_use",
+  render: (data: RenderableData): ReactNode => {
+    return (
+      <>
+        <div
+          className={clsx(
+            "text-style-label",
+            "text-style-secondary",
+            "text-size-smaller",
+          )}
+        >
+          Server Tool
+        </div>
+        <RecordTree
+          id={data.name || "server-tool"}
+          record={data}
+          className={clsx(styles.data)}
+        />
+      </>
+    );
+  },
+};
+export const contentDataRenderers: ContentDataRenderer[] = [
+  webSearchServerToolRenderer,
+  webSearchResultsServerToolRenderer,
+  serverToolRenderer,
+];

inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css ADDED Viewed

@@ -0,0 +1,10 @@
+.webSearch {
+  display: grid;
+  grid-template-columns: max-content 1fr;
+  column-gap: 0.5em;
+  align-items: baseline;
+}
+.query {
+  font-family: var(--bs-font-monospace);
+}

inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx ADDED Viewed

@@ -0,0 +1,14 @@
+import clsx from "clsx";
+import { FC } from "react";
+import styles from "./WebSearch.module.css";
+export const WebSearch: FC<{ query: string }> = ({ query }) => {
+  return (
+    <div className={clsx(styles.webSearch, "text-size-smaller")}>
+      <span className={clsx("text-style-label", "text-style-secondary")}>
+        Web Search:
+      </span>
+      <span className={clsx(styles.query, "text-size-smallest")}>{query}</span>
+    </div>
+  );
+};

inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css ADDED Viewed

@@ -0,0 +1,19 @@
+.webSearch {
+  display: grid;
+  grid-template-columns: max-content 1fr;
+  column-gap: 0.5em;
+  align-items: baseline;
+}
+.query {
+  font-family: var(--bs-font-monospace);
+}
+.result a:hover {
+  text-decoration: underline;
+}
+.result a {
+  opacity: 0.8;
+  text-decoration: none;
+}

inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx ADDED Viewed

@@ -0,0 +1,49 @@
+import clsx from "clsx";
+import { FC } from "react";
+import styles from "./WebSearchResults.module.css";
+export interface WebSearchContentData {
+  title: string;
+  url: string;
+  page_age: string;
+}
+export const WebSearchResults: FC<{ results: WebSearchContentData[] }> = ({
+  results,
+}) => {
+  return (
+    <>
+      <div
+        className={clsx(
+          styles.label,
+          "text-style-label",
+          "text-style-secondary",
+          "text-size-smaller",
+        )}
+      >
+        Results
+      </div>
+      <ol className={clsx(styles.results, "text-size-smaller")}>
+        {results.map((result, index) => (
+          <li
+            key={index}
+            className={clsx(styles.result, "text-style-secondary")}
+          >
+            <a
+              href={result.url}
+              target="_blank"
+              rel="noopener noreferrer"
+              title={
+                result.url +
+                (result.page_age ? `\n(Age: ${result.page_age})` : "")
+              }
+            >
+              {result.title}
+            </a>
+          </li>
+        ))}
+      </ol>
+    </>
+  );
+};

inspect_ai/_view/www/src/app/samples/chat/messages.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import {
   ChatMessageTool,
   ChatMessageUser,
   ContentAudio,
+  ContentData,
   ContentImage,
   ContentReasoning,
   ContentText,
@@ -64,6 +65,7 @@ export const resolveMessages = (messages: Messages) => {
     | ContentAudio
     | ContentVideo
     | ContentReasoning
+    | ContentData
   )[] = [];
   for (const systemMessage of systemMessages) {
     const contents = Array.isArray(systemMessage.content)
@@ -78,6 +80,7 @@ export const resolveMessages = (messages: Messages) => {
     content: systemContent,
     source: "input",
     internal: null,
+    metadata: null,
   };
   // Converge them
@@ -117,19 +120,22 @@ const normalizeContent = (
     | ContentAudio
     | ContentVideo
     | ContentReasoning
+    | ContentData
     | string,
 ):
   | ContentText
   | ContentImage
   | ContentAudio
   | ContentVideo
-  | ContentReasoning => {
+  | ContentReasoning
+  | ContentData => {
   if (typeof content === "string") {
     return {
       type: "text",
       text: content,
       refusal: null,
       internal: null,
+      citations: null,
     };
   } else {
     return content;

inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx CHANGED Viewed

@@ -2,6 +2,7 @@ import clsx from "clsx";
 import { FC, useMemo } from "react";
 import {
   ContentAudio,
+  ContentData,
   ContentImage,
   ContentReasoning,
   ContentText,
@@ -11,6 +12,7 @@ import {
 import { ContentTool } from "../../../../app/types";
 import ExpandablePanel from "../../../../components/ExpandablePanel";
 import { MessageContent } from "../MessageContent";
+import { defaultContext } from "../MessageContents";
 import styles from "./ToolCallView.module.css";
 import { ToolInput } from "./ToolInput";
 import { ToolTitle } from "./ToolTitle";
@@ -31,6 +33,7 @@ interface ToolCallViewProps {
     | ContentVideo
     | ContentTool
     | ContentReasoning
+    | ContentData
     | (
         | ContentText
         | ContentAudio
@@ -38,6 +41,7 @@ interface ToolCallViewProps {
         | ContentVideo
         | ContentTool
         | ContentReasoning
+        | ContentData
       )[];
   mode?: "compact";
 }
@@ -65,7 +69,8 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
       | ContentImage
       | ContentVideo
       | ContentTool
-      | ContentReasoning,
+      | ContentReasoning
+      | ContentData,
   ) {
     if (value && typeof value === "object") {
       if (value.type === "image") {
@@ -105,6 +110,7 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
   });
   const contents = mode !== "compact" ? input : input || functionCall;
+  const context = defaultContext();
   return (
     <div className={clsx(styles.toolCallView)}>
       <div>
@@ -127,7 +133,7 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
           lines={15}
           className={clsx("text-size-small")}
         >
-          <MessageContent contents={normalizedContent} />
+          <MessageContent contents={normalizedContent} context={context} />
         </ExpandablePanel>
       ) : undefined}
     </div>
@@ -148,6 +154,7 @@ const normalizeContent = (
     | ContentVideo
     | ContentTool
     | ContentReasoning
+    | ContentData
     | (
         | ContentText
         | ContentImage
@@ -155,6 +162,7 @@ const normalizeContent = (
         | ContentVideo
         | ContentTool
         | ContentReasoning
+        | ContentData
       )[],
 ): (
   | ContentText
@@ -163,6 +171,7 @@ const normalizeContent = (
   | ContentVideo
   | ContentTool
   | ContentReasoning
+  | ContentData
 )[] => {
   if (Array.isArray(output)) {
     return output;
@@ -176,6 +185,7 @@ const normalizeContent = (
             text: String(output),
             refusal: null,
             internal: null,
+            citations: null,
           },
         ],
       },

inspect_ai/_view/www/src/app/samples/chat/types.ts CHANGED Viewed

@@ -1 +1,5 @@
+import { Citations } from "../../../@types/log";
 export type ChatViewToolCallStyle = "compact" | "complete" | "omit";
+export type Citation = NonNullable<Citations>[number];

inspect_ai/_view/www/src/app/samples/list/SampleList.tsx CHANGED Viewed

@@ -20,7 +20,7 @@ import clsx from "clsx";
 import { useProperty, useSampleDescriptor } from "../../../state/hooks";
 import { useVirtuosoState } from "../../../state/scrolling";
 import { useStore } from "../../../state/store";
-import { useSampleNavigation } from "../../routing/navigationHooks";
+import { useSampleNavigation } from "../../routing/sampleNavigation";
 import { SampleFooter } from "./SampleFooter";
 import { SampleHeader } from "./SampleHeader";
 import styles from "./SampleList.module.css";

inspect_ai/_view/www/src/app/samples/sampleLimit.ts CHANGED Viewed

@@ -1,9 +1,9 @@
-import { Type14 } from "../../@types/log";
+import { Type21 } from "../../@types/log";
 /**
  * Formats a limit message
  */
-export const sampleLimitMessage = (type: Type14): string => {
+export const sampleLimitMessage = (type: Type21): string => {
   switch (type) {
     case "operator":
       return "Sample terminated due to operator limit.";

inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx CHANGED Viewed

@@ -216,7 +216,7 @@ const ToolsConfig: FC<ToolConfigProps> = ({ tools, toolChoice }) => {
       <div className={clsx(styles.toolConfig, "text-size-small")}>
         {toolEls}
       </div>
-      <div className={styles.toolChoice}>
+      <div className={clsx(styles.toolChoice, "text-size-small")}>
         <div className={clsx("text-style-label", "text-style-secondary")}>
           Tool Choice
         </div>

inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx CHANGED Viewed

@@ -1,6 +1,6 @@
 import clsx from "clsx";
 import { FC } from "react";
-import { SampleLimitEvent, Type10 } from "../../../@types/log";
+import { SampleLimitEvent, Type15 } from "../../../@types/log";
 import { ApplicationIcons } from "../../appearance/icons";
 import { EventPanel } from "./event/EventPanel";
 import { EventNode } from "./types";
@@ -17,12 +17,12 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
   eventNode,
   className,
 }) => {
-  const resolve_title = (type: Type10) => {
+  const resolve_title = (type: Type15) => {
     switch (type) {
       case "custom":
         return "Custom Limit Exceeded";
       case "time":
-        return "Time Limit Execeeded";
+        return "Time Limit Exceeded";
       case "message":
         return "Message Limit Exceeded";
       case "token":
@@ -34,7 +34,7 @@ export const SampleLimitEventView: FC<SampleLimitEventViewProps> = ({
     }
   };
-  const resolve_icon = (type: Type10) => {
+  const resolve_icon = (type: Type15) => {
     switch (type) {
       case "custom":
         return ApplicationIcons.limits.custom;

inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx CHANGED Viewed

@@ -16,7 +16,7 @@ import { useScrollTrack, useVirtuosoState } from "../../../../state/scrolling";
 import { useStore } from "../../../../state/store";
 import { flatTree } from "../transform/treeify";
-import { useSampleDetailNavigation } from "../../../routing/navigationHooks";
+import { useSampleDetailNavigation } from "../../../routing/sampleNavigation";
 import { kSandboxSignalName } from "../transform/fixups";
 import { OutlineRow } from "./OutlineRow";
 import styles from "./TranscriptOutline.module.css";

inspect_ai/_view/www/src/components/MarkdownDiv.tsx CHANGED Viewed

@@ -51,8 +51,11 @@ export const MarkdownDiv = forwardRef<HTMLDivElement, MarkdownDivProps>(
     // For `code` tags, reverse the escaping if we can
     const withCode = unescapeCodeHtmlEntities(unescaped);
+    // For `sup` tags, reverse the escaping if we can
+    const withSup = unescapeSupHtmlEntities(withCode);
     // Return the rendered markdown
-    const markup = { __html: withCode };
+    const markup = { __html: withSup };
     return (
       <div
@@ -65,7 +68,7 @@ export const MarkdownDiv = forwardRef<HTMLDivElement, MarkdownDivProps>(
   },
 );
-const kLetterListPattern = /^([a-zA-Z0-9][).]\s.*?)$/gm;
+const kLetterListPattern = /^([a-zA-Z][).]\s.*?)$/gm;
 const kCommonmarkReferenceLinkPattern = /\[([^\]]*)\]: (?!http)(.*)/g;
 const protectBackslashesInLatex = (content: string): string => {
@@ -193,6 +196,16 @@ const unprotectMarkdown = (txt: string): string => {
   return txt;
 };
+function unescapeSupHtmlEntities(str: string): string {
+  // replace &lt;sup&gt; with <sup>
+  if (!str) {
+    return str;
+  }
+  return str
+    .replace(/&lt;sup&gt;/g, "<sup>")
+    .replace(/&lt;\/sup&gt;/g, "</sup>");
+}
 function unescapeCodeHtmlEntities(str: string): string {
   if (!str) return str;

inspect_ai/_view/www/src/tests/README.md CHANGED Viewed

@@ -5,8 +5,8 @@ This directory contains the test files for the application. The test framework i
 ## Directory Structure
 - `tests/`: Root directory for all tests
-  - `__mocks__/`: Mock files for CSS modules and other assets
-  - `setupTests.mjs`: Setup file for Jest tests
+    - `__mocks__/`: Mock files for CSS modules and other assets
+    - `setupTests.mjs`: Setup file for Jest tests
 ## Running Tests

inspect_ai/_view/www/src/utils/git.ts CHANGED Viewed

@@ -2,6 +2,8 @@
  * Generates a GitHub commit URL based on the repository origin URL and the commit hash.
  */
 export const ghCommitUrl = (origin: string, commit: string): string => {
-  const baseUrl = origin.replace(/\.git$/, "");
+  const baseUrl = origin
+    .replace(/\.git$/, "")
+    .replace(/^git@github.com:/, "https://github.com/");
   return `${baseUrl}/commit/${commit}`;
 };

inspect_ai/_view/www/src/utils/html.ts CHANGED Viewed

@@ -4,3 +4,9 @@
 export function escapeSelector(id: string): string {
   return id.replace(/([ #.;,?!+*~'":^$[\]()=>|/\\])/g, "\\$1");
 }
+export const decodeHtmlEntities = (text: string): string => {
+  const parser = new DOMParser();
+  const doc = parser.parseFromString(text, "text/html");
+  return doc.documentElement.textContent || text;
+};

inspect_ai/agent/_handoff.py CHANGED Viewed

@@ -37,9 +37,9 @@ def handoff(
             Use the built-in `last_message` filter to return only the last message
             or alternatively specify a custom `MessageFilter` function.
         tool_name: Alternate tool name (defaults to `transfer_to_{agent_name}`)
-        limits: List of limits to apply to the agent. Should a limit be exceeded,
-            the agent stops and a user message is appended explaining that a limit was
-            exceeded.
+        limits: List of limits to apply to the agent. Limits are scoped to each
+            handoff to the agent. Should a limit be exceeded, the agent stops and a user
+            message is appended explaining that a limit was exceeded.
         **agent_kwargs: Arguments to curry to `Agent` function (arguments provided here
             will not be presented to the model as part of the tool interface).

inspect_ai/log/_condense.py CHANGED Viewed

@@ -9,6 +9,7 @@ from inspect_ai._util.constants import BASE_64_DATA_REMOVED
 from inspect_ai._util.content import (
     Content,
     ContentAudio,
+    ContentData,
     ContentImage,
     ContentReasoning,
     ContentText,
@@ -344,3 +345,7 @@ def walk_content(content: Content, content_fn: Callable[[str], str]) -> Content:
         return content.model_copy(update=dict(video=content_fn(content.video)))
     elif isinstance(content, ContentReasoning):
         return content.model_copy(update=dict(reasoning=content_fn(content.reasoning)))
+    elif isinstance(content, ContentData):
+        return content.model_copy(
+            update=dict(data=walk_json_value(content.data, content_fn))
+        )

inspect_ai/log/_file.py CHANGED Viewed

@@ -198,7 +198,10 @@ def write_log_dir_manifest(
     fs = filesystem(output_dir)
     manifest = f"{output_dir}{fs.sep}{filename}"
     manifest_json = to_json(
-        value=manifest_logs, indent=2, exclude_none=True, fallback=lambda _x: None
+        value=jsonable_python(manifest_logs),
+        indent=2,
+        exclude_none=True,
+        fallback=lambda _x: None,
     )
     with file(manifest, mode="wb", fs_options=fs_options) as f:
         f.write(manifest_json)

inspect_ai/log/_log.py CHANGED Viewed

@@ -422,7 +422,7 @@ class EvalSample(BaseModel):
             # warning will handle this)
             del values["transcript"]
-        return migrate_sandbox_spec(values)
+        return migrate_values(values)
     # allow field model_usage
     model_config = ConfigDict(protected_namespaces=())
@@ -707,7 +707,10 @@ class EvalSpec(BaseModel):
     """Attributes of the @task decorator."""
     task_args: dict[str, Any] = Field(default_factory=dict)
-    """Arguments used for invoking the task."""
+    """Arguments used for invoking the task (including defaults)."""
+    task_args_passed: dict[str, Any] = Field(default_factory=dict)
+    """Arguments explicitly passed by caller for invoking the task."""
     solver: str | None = Field(default=None)
     """Solver name."""
@@ -782,16 +785,18 @@ class EvalSpec(BaseModel):
     def read_sandbox_spec(
         cls: Type["EvalSpec"], values: dict[str, Any]
     ) -> dict[str, Any]:
-        return migrate_sandbox_spec(values)
+        return migrate_values(values)
-def migrate_sandbox_spec(values: dict[str, Any]) -> dict[str, Any]:
+def migrate_values(values: dict[str, Any]) -> dict[str, Any]:
     if "sandbox" in values:
         sandbox = values.get("sandbox")
         if isinstance(sandbox, list):
             values["sandbox"] = SandboxEnvironmentSpec(
                 type=sandbox[0], config=sandbox[1]
             )
+    if "task_args_passed" not in values:
+        values["task_args_passed"] = values.get("task_args", {})
     return values

inspect_ai/log/_recorders/eval.py CHANGED Viewed

@@ -133,6 +133,7 @@ class EvalRecorder(FileRecorder):
         results: EvalResults | None,
         reductions: list[EvalSampleReductions] | None,
         error: EvalError | None = None,
+        header_only: bool = False,
     ) -> EvalLog:
         # get the key and log
         key = self._log_file_key(eval)
@@ -174,7 +175,7 @@ class EvalRecorder(FileRecorder):
         # flush and write the results
         await log.flush()
-        return await log.close()
+        return await log.close(header_only)
     @classmethod
     @override
@@ -321,12 +322,12 @@ class ZipLogFile:
                     # re-open zip file w/ self.temp_file pointer at end
                     self._open()
-    async def close(self) -> EvalLog:
+    async def close(self, header_only: bool) -> EvalLog:
         async with self._lock:
             # read the log from the temp file then close it
             try:
                 self._temp_file.seek(0)
-                return _read_log(self._temp_file, self._file)
+                return _read_log(self._temp_file, self._file, header_only=header_only)
             finally:
                 self._temp_file.close()
                 if self._zip:

inspect_ai/log/_recorders/json.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Any, Literal, get_args
 import ijson  # type: ignore
 from ijson import IncompleteJSONError
+from ijson.backends.python import UnexpectedSymbol  # type: ignore
 from pydantic import BaseModel
 from pydantic_core import from_json
 from typing_extensions import override
@@ -96,6 +97,7 @@ class JSONRecorder(FileRecorder):
         results: EvalResults | None,
         reductions: list[EvalSampleReductions] | None,
         error: EvalError | None = None,
+        header_only: bool = False,
     ) -> EvalLog:
         log = self.data[self._log_file_key(spec)]
         log.data.status = status
@@ -128,12 +130,13 @@ class JSONRecorder(FileRecorder):
             # The Python JSON serializer supports NaN and Inf, however
             # this isn't technically part of the JSON spec. The json-stream
             # library shares this limitation, so if we fail with an
-            # invalid character then we move on and and parse w/ pydantic
+            # invalid character (or Unexpected symbol) then we move on and and parse w/ pydantic
             # (which does support NaN and Inf by default)
-            except (ValueError, IncompleteJSONError) as ex:
+            except (ValueError, IncompleteJSONError, UnexpectedSymbol) as ex:
                 if (
                     str(ex).find("Invalid JSON character") != -1
                     or str(ex).find("invalid char in json text") != -1
+                    or str(ex).find("Unexpected symbol") != -1
                 ):
                     pass
                 else:

inspect_ai/log/_recorders/recorder.py CHANGED Viewed

@@ -46,6 +46,7 @@ class Recorder(abc.ABC):
         results: EvalResults | None,
         reductions: list[EvalSampleReductions] | None,
         error: EvalError | None = None,
+        header_only: bool = False,
     ) -> EvalLog: ...
     @classmethod

inspect_ai/log/_util.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Any
 from inspect_ai._util.content import (
     ContentAudio,
+    ContentData,
     ContentImage,
     ContentReasoning,
     ContentText,
@@ -24,6 +25,7 @@ def text_input_only(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
                     | ContentImage
                     | ContentAudio
                     | ContentVideo
+                    | ContentData
                 ] = []
                 for content in message.content:
                     if content.type == "text":

inspect-ai 0.3.102__py3-none-any.whl → 0.3.104__py3-none-any.whl

inspect-ai 0.3.102py3-none-any.whl → 0.3.104py3-none-any.whl