PyPI - inspect-ai - Versions diffs - 0.3.88__py3-none-any.whl → 0.3.90__py3-none-any.whl - Mend

inspect-ai 0.3.88py3-none-any.whl → 0.3.90py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

inspect_ai/_cli/eval.py +16 -0
inspect_ai/_cli/score.py +1 -12
inspect_ai/_cli/util.py +4 -2
inspect_ai/_display/core/footer.py +2 -2
inspect_ai/_display/plain/display.py +2 -2
inspect_ai/_eval/context.py +7 -1
inspect_ai/_eval/eval.py +51 -27
inspect_ai/_eval/evalset.py +27 -10
inspect_ai/_eval/loader.py +7 -8
inspect_ai/_eval/run.py +23 -31
inspect_ai/_eval/score.py +18 -1
inspect_ai/_eval/task/log.py +5 -13
inspect_ai/_eval/task/resolved.py +1 -0
inspect_ai/_eval/task/run.py +231 -256
inspect_ai/_eval/task/task.py +25 -2
inspect_ai/_eval/task/util.py +1 -8
inspect_ai/_util/constants.py +1 -0
inspect_ai/_util/json.py +8 -3
inspect_ai/_util/registry.py +30 -13
inspect_ai/_view/www/App.css +5 -0
inspect_ai/_view/www/dist/assets/index.css +71 -36
inspect_ai/_view/www/dist/assets/index.js +573 -475
inspect_ai/_view/www/log-schema.json +66 -0
inspect_ai/_view/www/src/metadata/MetaDataView.module.css +1 -1
inspect_ai/_view/www/src/metadata/MetaDataView.tsx +13 -8
inspect_ai/_view/www/src/metadata/RenderedContent.tsx +3 -0
inspect_ai/_view/www/src/plan/ModelCard.module.css +16 -0
inspect_ai/_view/www/src/plan/ModelCard.tsx +93 -0
inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -2
inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +5 -1
inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +12 -6
inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +0 -2
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +6 -29
inspect_ai/_view/www/src/types/log.d.ts +24 -6
inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.module.css +16 -0
inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.tsx +43 -0
inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -1
inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +5 -0
inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -0
inspect_ai/agent/_agent.py +12 -0
inspect_ai/agent/_as_tool.py +1 -1
inspect_ai/agent/_bridge/bridge.py +9 -2
inspect_ai/agent/_react.py +142 -74
inspect_ai/agent/_run.py +13 -2
inspect_ai/agent/_types.py +6 -0
inspect_ai/approval/_apply.py +6 -7
inspect_ai/approval/_approver.py +3 -3
inspect_ai/approval/_auto.py +2 -2
inspect_ai/approval/_call.py +20 -4
inspect_ai/approval/_human/approver.py +3 -3
inspect_ai/approval/_human/manager.py +2 -2
inspect_ai/approval/_human/panel.py +3 -3
inspect_ai/approval/_policy.py +3 -3
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_log.py +23 -2
inspect_ai/log/_model.py +58 -0
inspect_ai/log/_recorders/file.py +14 -3
inspect_ai/log/_transcript.py +3 -0
inspect_ai/model/__init__.py +2 -0
inspect_ai/model/_call_tools.py +4 -1
inspect_ai/model/_model.py +49 -3
inspect_ai/model/_openai.py +151 -21
inspect_ai/model/_providers/anthropic.py +20 -12
inspect_ai/model/_providers/bedrock.py +3 -3
inspect_ai/model/_providers/cloudflare.py +29 -108
inspect_ai/model/_providers/google.py +21 -10
inspect_ai/model/_providers/grok.py +23 -17
inspect_ai/model/_providers/groq.py +61 -37
inspect_ai/model/_providers/llama_cpp_python.py +8 -9
inspect_ai/model/_providers/mistral.py +8 -3
inspect_ai/model/_providers/ollama.py +8 -9
inspect_ai/model/_providers/openai.py +53 -157
inspect_ai/model/_providers/openai_compatible.py +195 -0
inspect_ai/model/_providers/openrouter.py +4 -15
inspect_ai/model/_providers/providers.py +11 -0
inspect_ai/model/_providers/together.py +25 -23
inspect_ai/model/_trim.py +83 -0
inspect_ai/solver/_plan.py +5 -3
inspect_ai/tool/_tool_def.py +8 -2
inspect_ai/util/__init__.py +3 -0
inspect_ai/util/_concurrency.py +15 -2
{inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/METADATA +1 -1
{inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/RECORD +88 -83
{inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/WHEEL +1 -1
inspect_ai/_eval/task/rundir.py +0 -78
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
{inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.88.dist-info → inspect_ai-0.3.90.dist-info}/top_level.txt +0 -0

inspect_ai/_view/www/log-schema.json CHANGED Viewed

@@ -1342,6 +1342,43 @@
       "type": "object",
       "additionalProperties": false
     },
+    "EvalModelConfig": {
+      "description": "Model config.",
+      "properties": {
+        "model": {
+          "title": "Model",
+          "type": "string"
+        },
+        "config": {
+          "$ref": "#/$defs/GenerateConfig"
+        },
+        "base_url": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Base Url"
+        },
+        "args": {
+          "title": "Args",
+          "type": "object"
+        }
+      },
+      "required": [
+        "model",
+        "config",
+        "base_url",
+        "args"
+      ],
+      "title": "EvalModelConfig",
+      "type": "object",
+      "additionalProperties": false
+    },
     "EvalPlan": {
       "description": "Plan (solvers) used in evaluation.",
       "properties": {
@@ -2269,6 +2306,21 @@
           "title": "Model Args",
           "type": "object"
         },
+        "model_roles": {
+          "anyOf": [
+            {
+              "additionalProperties": {
+                "$ref": "#/$defs/EvalModelConfig"
+              },
+              "type": "object"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Model Roles"
+        },
         "config": {
           "$ref": "#/$defs/EvalConfig"
         },
@@ -2361,6 +2413,7 @@
         "model_generate_config",
         "model_base_url",
         "model_args",
+        "model_roles",
         "config",
         "revision",
         "packages",
@@ -3310,6 +3363,18 @@
           "title": "Model",
           "type": "string"
         },
+        "role": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Role"
+        },
         "input": {
           "items": {
             "anyOf": [
@@ -3430,6 +3495,7 @@
         "pending",
         "event",
         "model",
+        "role",
         "input",
         "tools",
         "tool_choice",

inspect_ai/_view/www/src/metadata/MetaDataView.module.css CHANGED Viewed

@@ -9,7 +9,7 @@
 }
 .cell {
-  padding: 0.3em 0.3em 0.3em 0em;
+  padding: 0em 0.5em 0.3em 0em !important;
 }
 .compact .cell {

inspect_ai/_view/www/src/metadata/MetaDataView.tsx CHANGED Viewed

@@ -6,7 +6,7 @@ import { RenderedContent } from "./RenderedContent";
 interface MetadataViewProps {
   id?: string;
   style?: CSSProperties;
-  entries: Record<string, unknown>;
+  entries: Record<string, unknown> | Array<{ name: string; value: unknown }>;
   tableOptions?: string;
   compact?: boolean;
   className?: string | string[];
@@ -66,11 +66,6 @@ export const MetaDataView: FC<MetadataViewProps> = ({
       )}
       style={style}
     >
-      <thead>
-        <tr>
-          <th colSpan={2} className={"th"}></th>
-        </tr>
-      </thead>
       <tbody>{entryEls}</tbody>
     </table>
   );
@@ -80,11 +75,21 @@ export const MetaDataView: FC<MetadataViewProps> = ({
 // or an array of record with name/value on way in
 // but coerce to array of records for order
 const toNameValues = (
-  entries?: Array<{ name: string; value: unknown }> | Record<string, unknown>,
+  entries?:
+    | Array<{ name: string; value: unknown }>
+    | Record<string, unknown>
+    | Array<unknown>,
 ): Array<{ name: string; value: unknown }> | undefined => {
   if (entries) {
     if (Array.isArray(entries)) {
-      return entries;
+      // filter arrays that don't contain the expected name value pairs
+      const filtered = entries.filter((entry) => {
+        if (entry && typeof entry === "object") {
+          return "name" in entry && "value" in entry;
+        }
+        return false;
+      });
+      return filtered as Array<{ name: string; value: unknown }>;
     } else {
       return Object.entries(entries || {}).map(([key, value]) => {
         return { name: key, value };

inspect_ai/_view/www/src/metadata/RenderedContent.tsx CHANGED Viewed

@@ -147,6 +147,9 @@ const contentRenderers: Record<string, ContentRenderer> = {
     canRender: (entry) => {
       const isArray = Array.isArray(entry.value);
       if (isArray) {
+        if (entry.value.length === 0 || entry.value.length === 1) {
+          return true;
+        }
         const types = new Set(
           entry.value
             .filter((e: unknown) => e !== null)

inspect_ai/_view/www/src/plan/ModelCard.module.css ADDED Viewed

@@ -0,0 +1,16 @@
+.container {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
+  row-gap: 2em;
+  column-gap: 1em;
+}
+.modelInfo {
+  display: grid;
+  grid-template-columns: max-content auto;
+  column-gap: 1em;
+}
+.role {
+  grid-column: -1/1;
+}

inspect_ai/_view/www/src/plan/ModelCard.tsx ADDED Viewed

@@ -0,0 +1,93 @@
+import { FC } from "react";
+import { ApplicationIcons } from "../appearance/icons";
+import { Card, CardBody, CardHeader } from "../components/Card";
+import { EvalModelConfig, EvalSpec } from "../types/log";
+import clsx from "clsx";
+import { MetaDataGrid } from "../metadata/MetaDataGrid";
+import styles from "./ModelCard.module.css";
+interface ModelCardProps {
+  evalSpec?: EvalSpec;
+}
+/**
+ * Renders the plan card
+ */
+export const ModelCard: FC<ModelCardProps> = ({ evalSpec }) => {
+  if (!evalSpec) {
+    return undefined;
+  }
+  const modelsInfo: Record<string, EvalModelConfig> = {
+    eval: {
+      model: evalSpec.model,
+      base_url: evalSpec.model_base_url,
+      config: evalSpec.model_generate_config,
+      args: evalSpec.model_args,
+    },
+    ...evalSpec.model_roles,
+  };
+  const noneEl = <span className="text-style-secondary">None</span>;
+  return (
+    <Card>
+      <CardHeader icon={ApplicationIcons.model} label="Models" />
+      <CardBody id={"task-model-card-body"}>
+        <div className={styles.container}>
+          {Object.keys(modelsInfo || {}).map((modelKey) => {
+            const modelInfo = modelsInfo[modelKey];
+            return (
+              <div
+                key={modelKey}
+                className={clsx(styles.modelInfo, "text-size-small")}
+              >
+                <div
+                  className={clsx(
+                    styles.role,
+                    "text-style-label",
+                    "text-style-secondary",
+                  )}
+                >
+                  {modelKey}
+                </div>
+                <div className={clsx("text-style-label")}>Model</div>
+                <div>{modelInfo.model}</div>
+                <div className={clsx("text-style-label")}>Base Url</div>
+                <div className="text-size-small">
+                  {modelInfo.base_url || noneEl}
+                </div>
+                <div className={clsx("text-style-label")}>Configuration</div>
+                <div className="text-size-small">
+                  {modelInfo.config &&
+                  Object.keys(modelInfo.config).length > 0 ? (
+                    <MetaDataGrid
+                      entries={
+                        modelInfo.config as any as Record<string, unknown>
+                      }
+                    />
+                  ) : (
+                    noneEl
+                  )}
+                </div>
+                <div className={clsx("text-style-label")}>Args</div>
+                <div className="text-size-small">
+                  {Object.keys(modelInfo.args).length > 0 ? (
+                    <MetaDataGrid
+                      entries={modelInfo.args as any as Record<string, unknown>}
+                    />
+                  ) : (
+                    noneEl
+                  )}
+                </div>
+              </div>
+            );
+          })}
+        </div>
+      </CardBody>
+    </Card>
+  );
+};

inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx CHANGED Viewed

@@ -26,7 +26,7 @@ export const ChatMessage: FC<ChatMessageProps> = ({
   indented,
   toolCallStyle,
 }) => {
-  const collapse = message.role === "system";
+  const collapse = message.role === "system" || message.role === "user";
   return (
     <div
       className={clsx(
@@ -46,7 +46,7 @@ export const ChatMessage: FC<ChatMessageProps> = ({
           indented ? styles.indented : undefined,
         )}
       >
-        <ExpandablePanel id={`${id}-message`} collapse={collapse} lines={30}>
+        <ExpandablePanel id={`${id}-message`} collapse={collapse} lines={15}>
           <MessageContents
             id={`${id}-contents`}
             key={`${id}-contents`}

inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css CHANGED Viewed

@@ -5,8 +5,8 @@
 }
 .outputCode {
-  overflow-wrap: anywhere;
-  white-space: pre-wrap;
+  overflow-wrap: anywhere !important;
+  white-space: pre-wrap !important;
 }
 .bottomPadding {

inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx CHANGED Viewed

@@ -63,11 +63,15 @@ export const ModelEventView: FC<ModelEventViewProps> = ({
     }
   }
+  const panelTitle = event.role
+    ? `Model Call (${event.role}): ${event.model}`
+    : `Model Call: ${event.model}`;
   return (
     <EventPanel
       id={id}
       className={className}
-      title={formatTitle(`Model Call: ${event.model}`, totalUsage, callTime)}
+      title={formatTitle(panelTitle, totalUsage, callTime)}
       subTitle={formatTiming(event.timestamp, event.working_start)}
       icon={ApplicationIcons.model}
     >

inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx CHANGED Viewed

@@ -132,6 +132,18 @@ const stepDescriptor = (
     return {
       ...rootStepDescriptor,
     };
+  } else if (event.event === "step") {
+    if (event.name === "init") {
+      return {
+        ...rootStepDescriptor,
+        name: "Init",
+        collapse: true,
+      };
+    } else {
+      return {
+        ...rootStepDescriptor,
+      };
+    }
   } else {
     switch (event.name) {
       case "sample_init":
@@ -140,12 +152,6 @@ const stepDescriptor = (
           name: "Sample Init",
           collapse: true,
         };
-      case "init":
-        return {
-          ...rootStepDescriptor,
-          name: "Init",
-          collapse: true,
-        };
       default:
         return {
           endSpace: false,

inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css CHANGED Viewed

@@ -28,8 +28,6 @@
 .eventNode {
   background-color: var(--bs-body-bg);
-  margin-bottom: 1.5em;
-  padding-bottom: 0.5em;
 }
 .eventNode.darkenBg {

inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx CHANGED Viewed

@@ -276,44 +276,21 @@ function setPath(
   value: unknown,
 ): void {
   const keys = parsePath(path);
-  let current: Record<string, unknown> | unknown[] = target;
+  let current: Record<string, unknown> = target;
   for (let i = 0; i < keys.length - 1; i++) {
     const key = keys[i];
-    if (Array.isArray(current)) {
-      const numericIndex = getIndex(key);
-      current[numericIndex] = isArrayIndex(keys[i + 1]) ? [] : {};
-      current = current[numericIndex] as
-        | Record<string, unknown>
-        | Array<unknown>;
-    } else {
-      if (!(key in current)) {
-        // If the next key is a number, create an array, otherwise an object
-        current[key] = isArrayIndex(keys[i + 1]) ? [] : {};
-      }
-      current = current[key] as Record<string, unknown> | Array<unknown>;
+    if (!(key in current)) {
+      // If the next key is a number, create an array, otherwise an object
+      current[key] = isArrayIndex(keys[i + 1]) ? [] : {};
     }
+    current = current[key] as Record<string, unknown>;
   }
   const lastKey = keys[keys.length - 1];
-  if (Array.isArray(current)) {
-    const numericIndex = getIndex(lastKey);
-    current[numericIndex] = value;
-  } else {
-    current[lastKey] = value;
-  }
+  current[lastKey] = value;
 }
-const getIndex = (key: string): number => {
-  const numericIndex = isArrayIndex(key) ? parseInt(key) : undefined;
-  if (numericIndex === undefined) {
-    throw new Error(`The key ${key} isn't a valid Array index!`);
-  }
-  return numericIndex;
-};
 /**
  * Places structure in an object (without placing values)
  */

inspect_ai/_view/www/src/types/log.d.ts CHANGED Viewed

@@ -65,6 +65,11 @@ export type Required = string[] | null;
 export type Description1 = string | null;
 export type Strict = boolean | null;
 export type ModelBaseUrl = string | null;
+export type ModelRoles = {
+  [k: string]: EvalModelConfig;
+} | null;
+export type Model1 = string;
+export type BaseUrl = string | null;
 export type Limit = number | [unknown, unknown] | null;
 export type SampleId = string | number | (string | number)[] | null;
 export type Epochs = number | null;
@@ -211,7 +216,7 @@ export type Title = string | null;
 export type Format2 = "text" | "markdown";
 export type Content3 = string;
 export type Type8 = string | null;
-export type Model1 = string | null;
+export type Model2 = string | null;
 export type Id5 = string | null;
 export type Content4 =
   | string
@@ -247,7 +252,7 @@ export type Messages = (
   | ChatMessageAssistant
   | ChatMessageTool
 )[];
-export type Model2 = string;
+export type Model3 = string;
 export type StopReason =
   | "stop"
   | "max_tokens"
@@ -346,7 +351,8 @@ export type Timestamp5 = string;
 export type WorkingStart5 = number;
 export type Pending5 = boolean | null;
 export type Event5 = "model";
-export type Model3 = string;
+export type Model4 = string;
+export type Role4 = string | null;
 export type Input3 = (
   | ChatMessageSystem
   | ChatMessageUser
@@ -580,6 +586,7 @@ export interface EvalSpec {
   model_generate_config: GenerateConfig;
   model_base_url: ModelBaseUrl;
   model_args: ModelArgs;
+  model_roles: ModelRoles;
   config: EvalConfig;
   revision: EvalRevision | null;
   packages: Packages;
@@ -666,6 +673,16 @@ export interface Default {
   [k: string]: unknown;
 }
 export interface ModelArgs {}
+/**
+ * Model config.
+ */
+export interface EvalModelConfig {
+  model: Model1;
+  config: GenerateConfig;
+  base_url: BaseUrl;
+  args: Args;
+}
+export interface Args {}
 /**
  * Configuration used for evaluation.
  */
@@ -948,7 +965,7 @@ export interface ChatMessageAssistant {
   internal: unknown;
   role: Role2;
   tool_calls: ToolCalls;
-  model: Model1;
+  model: Model2;
 }
 export interface ToolCall {
   id: Id4;
@@ -989,7 +1006,7 @@ export interface ToolCallError {
  * Output from model generation.
  */
 export interface ModelOutput {
-  model: Model2;
+  model: Model3;
   choices: Choices1;
   usage: ModelUsage1 | null;
   time: Time;
@@ -1133,7 +1150,8 @@ export interface ModelEvent {
   working_start: WorkingStart5;
   pending: Pending5;
   event: Event5;
-  model: Model3;
+  model: Model4;
+  role: Role4;
   input: Input3;
   tools: Tools1;
   tool_choice: ToolChoice;

inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.module.css ADDED Viewed

@@ -0,0 +1,16 @@
+.container {
+  display: flex;
+  flex-direction: row;
+  flex-wrap: wrap;
+  gap: 0;
+  margin-top: -0.2rem;
+  margin-bottom: 0.2rem;
+}
+.grid {
+  display: grid;
+  grid-template-rows: repeat(auto-fill, minmax(10px, 1fr));
+  grid-template-columns: 1fr;
+  gap: 0.1em;
+  padding-right: 1em;
+}

inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.tsx ADDED Viewed

@@ -0,0 +1,43 @@
+import { FC } from "react";
+import { ModelRoles } from "../../types/log";
+import clsx from "clsx";
+import styles from "./ModelRolesView.module.css";
+interface ModelRolesViewProps {
+  roles: ModelRoles;
+}
+/**
+ * Renders the Navbar
+ */
+export const ModelRolesView: FC<ModelRolesViewProps> = ({ roles }) => {
+  roles = roles || {};
+  // Render as a single line if there is only a single
+  // model role
+  const singleLine = Object.keys(roles).length !== 1;
+  // Render a layout of model roles
+  const modelEls = Object.keys(roles).map((key) => {
+    const role = key;
+    const roleData = roles[role];
+    const model = roleData.model;
+    return (
+      <div
+        className={clsx(
+          singleLine ? styles.grid : undefined,
+          "text-style-secondary",
+          "text-size-smallest",
+        )}
+        key={key}
+      >
+        <span className={clsx("text-style-label")}>{role}:</span>
+        <span>{model}</span>
+      </div>
+    );
+  });
+  return modelEls.length > 0 ? (
+    <div className={styles.container}>{modelEls}</div>
+  ) : undefined;
+};

inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css CHANGED Viewed

@@ -46,7 +46,7 @@
 .secondaryContainer {
   opacity: 0.7;
-  margin-top: 0.1rem;
+  margin-top: -0.1rem;
   padding-bottom: 0;
   display: grid;
   grid-template-columns: minmax(0, max-content) max-content;

inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx CHANGED Viewed

@@ -7,6 +7,7 @@ import { kModelNone } from "../../constants";
 import { useStore } from "../../state/store";
 import { EvalResults, EvalSpec, Status } from "../../types/log";
 import { filename } from "../../utils/path";
+import { ModelRolesView } from "./ModelRolesView";
 import styles from "./PrimaryBar.module.css";
 import {
   displayScorersFromRunningMetrics,
@@ -100,6 +101,10 @@ export const PrimaryBar: FC<PrimaryBarProps> = ({
               ""
             )}
           </div>
+          {evalSpec?.model_roles ? (
+            <ModelRolesView roles={evalSpec.model_roles} />
+          ) : undefined}
           <div className={clsx("text-size-small", styles.secondaryContainer)}>
             <div className={clsx("navbar-secondary-text", "text-truncate")}>
               {logFileName}

inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx CHANGED Viewed

@@ -1,6 +1,7 @@
 import { FC } from "react";
 import { SampleSummary } from "../../api/types";
 import { MessageBand } from "../../components/MessageBand";
+import { ModelCard } from "../../plan/ModelCard";
 import { PlanCard } from "../../plan/PlanCard";
 import {
   EvalError,
@@ -55,6 +56,7 @@ export const InfoTab: FC<PlanTabProps> = ({
           evalPlan={evalPlan}
           scores={evalResults?.scores}
         />
+        {evalSpec ? <ModelCard evalSpec={evalSpec} /> : undefined}
         {evalStatus !== "started" ? <UsageCard stats={evalStats} /> : undefined}
         {evalStatus === "error" && evalError ? (
           <TaskErrorCard error={evalError} />

inspect_ai/agent/_agent.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from copy import copy, deepcopy
 from functools import wraps
+from inspect import signature
 from typing import (
     Any,
     Callable,
@@ -7,6 +8,7 @@ from typing import (
     Protocol,
     TypeGuard,
     cast,
+    get_type_hints,
     overload,
     runtime_checkable,
 )
@@ -189,6 +191,16 @@ def agent(
             )
             return agent
+        # If a user's code runs "from __future__ import annotations", all type annotations are stored as strings,
+        # which can break introspection-based mechanisms (like inspecting a function’s signature).
+        # The following two lines resolve these string annotations using the original function's globals,
+        # ensuring that any forward references (e.g., "Agent") are evaluated to their actual types,
+        # and then reassign the original function's signature to the wrapper.
+        agent_wrapper.__annotations__ = get_type_hints(
+            agent_wrapper, agent_type.__globals__
+        )
+        agent_wrapper.__signature__ = signature(agent_type)  # type: ignore[attr-defined]
         # register
         return agent_register(cast(Callable[P, Agent], agent_wrapper), agent_name)

inspect_ai/agent/_as_tool.py CHANGED Viewed

@@ -42,7 +42,7 @@ def as_tool(agent: Agent, description: str | None = None, **agent_kwargs: Any) -
     async def execute(input: str, *args: Any, **kwargs: Any) -> ToolResult:
         # prepare state and call agent
-        state = AgentState(messages=[ChatMessageUser(content=input)])
+        state = AgentState(messages=[ChatMessageUser(content=input, source="input")])
         state = await agent(state, *args, **(agent_kwargs | kwargs))
         # find assistant message to read content from (prefer output)

inspect-ai 0.3.88__py3-none-any.whl → 0.3.90__py3-none-any.whl

inspect-ai 0.3.88py3-none-any.whl → 0.3.90py3-none-any.whl