PyPI - inspect-ai - Versions diffs - 0.3.87__py3-none-any.whl → 0.3.89__py3-none-any.whl - Mend

inspect-ai 0.3.87py3-none-any.whl → 0.3.89py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

inspect_ai/_cli/eval.py +16 -0
inspect_ai/_cli/score.py +1 -12
inspect_ai/_cli/util.py +4 -2
inspect_ai/_display/core/footer.py +2 -2
inspect_ai/_display/plain/display.py +2 -2
inspect_ai/_eval/context.py +7 -1
inspect_ai/_eval/eval.py +51 -27
inspect_ai/_eval/evalset.py +27 -10
inspect_ai/_eval/loader.py +7 -8
inspect_ai/_eval/run.py +23 -31
inspect_ai/_eval/score.py +18 -1
inspect_ai/_eval/task/log.py +5 -13
inspect_ai/_eval/task/resolved.py +1 -0
inspect_ai/_eval/task/run.py +231 -244
inspect_ai/_eval/task/task.py +25 -2
inspect_ai/_eval/task/util.py +1 -8
inspect_ai/_util/constants.py +1 -0
inspect_ai/_util/json.py +8 -3
inspect_ai/_util/registry.py +30 -13
inspect_ai/_view/www/App.css +5 -0
inspect_ai/_view/www/dist/assets/index.css +55 -18
inspect_ai/_view/www/dist/assets/index.js +550 -458
inspect_ai/_view/www/log-schema.json +84 -1
inspect_ai/_view/www/src/metadata/MetaDataView.module.css +1 -1
inspect_ai/_view/www/src/metadata/MetaDataView.tsx +13 -8
inspect_ai/_view/www/src/metadata/RenderedContent.tsx +3 -0
inspect_ai/_view/www/src/plan/ModelCard.module.css +16 -0
inspect_ai/_view/www/src/plan/ModelCard.tsx +93 -0
inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +5 -1
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +6 -29
inspect_ai/_view/www/src/types/log.d.ts +150 -129
inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.module.css +16 -0
inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.tsx +43 -0
inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -1
inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +5 -0
inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -0
inspect_ai/agent/_agent.py +12 -0
inspect_ai/agent/_as_tool.py +1 -1
inspect_ai/agent/_bridge/bridge.py +9 -2
inspect_ai/agent/_react.py +142 -74
inspect_ai/agent/_run.py +13 -2
inspect_ai/agent/_types.py +6 -0
inspect_ai/approval/_apply.py +6 -9
inspect_ai/approval/_approver.py +3 -3
inspect_ai/approval/_auto.py +2 -2
inspect_ai/approval/_call.py +20 -4
inspect_ai/approval/_human/approver.py +3 -3
inspect_ai/approval/_human/manager.py +2 -2
inspect_ai/approval/_human/panel.py +3 -3
inspect_ai/approval/_policy.py +3 -3
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_log.py +23 -2
inspect_ai/log/_model.py +58 -0
inspect_ai/log/_recorders/file.py +14 -3
inspect_ai/log/_transcript.py +3 -0
inspect_ai/model/__init__.py +2 -0
inspect_ai/model/_call_tools.py +15 -2
inspect_ai/model/_model.py +49 -3
inspect_ai/model/_openai.py +151 -21
inspect_ai/model/_providers/anthropic.py +25 -14
inspect_ai/model/_providers/bedrock.py +3 -3
inspect_ai/model/_providers/cloudflare.py +29 -108
inspect_ai/model/_providers/google.py +21 -10
inspect_ai/model/_providers/grok.py +23 -17
inspect_ai/model/_providers/groq.py +61 -37
inspect_ai/model/_providers/llama_cpp_python.py +8 -9
inspect_ai/model/_providers/mistral.py +8 -3
inspect_ai/model/_providers/ollama.py +8 -9
inspect_ai/model/_providers/openai.py +53 -157
inspect_ai/model/_providers/openai_compatible.py +195 -0
inspect_ai/model/_providers/openrouter.py +4 -15
inspect_ai/model/_providers/providers.py +11 -0
inspect_ai/model/_providers/together.py +25 -23
inspect_ai/model/_trim.py +83 -0
inspect_ai/solver/_plan.py +5 -3
inspect_ai/tool/_tool_call.py +3 -0
inspect_ai/tool/_tool_def.py +8 -2
inspect_ai/util/__init__.py +3 -0
inspect_ai/util/_concurrency.py +15 -2
{inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/METADATA +1 -1
{inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/RECORD +86 -81
inspect_ai/_eval/task/rundir.py +0 -78
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
{inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.87.dist-info → inspect_ai-0.3.89.dist-info}/top_level.txt +0 -0

inspect_ai/_view/www/src/types/log.d.ts CHANGED Viewed

@@ -24,12 +24,57 @@ export type SampleIds = (number | string)[] | null;
 export type Shuffled = boolean | null;
 export type Type = string;
 export type Model = string;
+export type MaxRetries = number | null;
+export type Timeout = number | null;
+export type MaxConnections = number | null;
+export type SystemMessage = string | null;
+export type MaxTokens = number | null;
+export type TopP = number | null;
+export type Temperature = number | null;
+export type StopSeqs = string[] | null;
+export type BestOf = number | null;
+export type FrequencyPenalty = number | null;
+export type PresencePenalty = number | null;
+export type LogitBias = {
+  [k: string]: number;
+} | null;
+export type Seed = number | null;
+export type TopK = number | null;
+export type NumChoices = number | null;
+export type Logprobs = boolean | null;
+export type TopLogprobs = number | null;
+export type ParallelToolCalls = boolean | null;
+export type InternalTools = boolean | null;
+export type MaxToolOutput = number | null;
+export type CachePrompt = "auto" | boolean | null;
+export type ReasoningEffort = ("low" | "medium" | "high") | null;
+export type ReasoningTokens = number | null;
+export type ReasoningHistory = ("none" | "all" | "last" | "auto") | null;
+export type Name1 = string;
+export type Type1 =
+  | ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
+  | null;
+export type Description = string | null;
+export type Enum = unknown[] | null;
+export type Properties = {
+  [k: string]: JSONSchema;
+} | null;
+export type Additionalproperties = JSONSchema | boolean | null;
+export type Anyof = JSONSchema[] | null;
+export type Required = string[] | null;
+export type Description1 = string | null;
+export type Strict = boolean | null;
 export type ModelBaseUrl = string | null;
+export type ModelRoles = {
+  [k: string]: EvalModelConfig;
+} | null;
+export type Model1 = string;
+export type BaseUrl = string | null;
 export type Limit = number | [unknown, unknown] | null;
 export type SampleId = string | number | (string | number)[] | null;
 export type Epochs = number | null;
 export type EpochsReducer = string[] | null;
-export type Name1 = string;
+export type Name2 = string;
 export type Tools = string | string[];
 export type Approvers = ApproverPolicyConfig[];
 export type FailOnError = boolean | number | null;
@@ -47,12 +92,12 @@ export type LogImages = boolean | null;
 export type LogBuffer = number | null;
 export type LogShared = number | null;
 export type ScoreDisplay = boolean | null;
-export type Type1 = "git";
+export type Type2 = "git";
 export type Origin = string;
 export type Commit = string;
 export type Metadata = {} | null;
 export type Scorers = EvalScorer[] | null;
-export type Name2 = string;
+export type Name3 = string;
 export type Options = {} | null;
 export type Metrics =
   | (
@@ -65,7 +110,7 @@ export type Metrics =
       [k: string]: EvalMetricDefinition[];
     }
   | null;
-export type Name3 = string;
+export type Name4 = string;
 export type Options1 = {} | null;
 export type Metadata1 = {} | null;
 export type Metrics1 =
@@ -74,49 +119,9 @@ export type Metrics1 =
       [k: string]: EvalMetricDefinition[];
     }
   | null;
-export type Name4 = string;
+export type Name5 = string;
 export type Solver1 = string;
 export type Steps = EvalPlanStep[];
-export type MaxRetries = number | null;
-export type Timeout = number | null;
-export type MaxConnections = number | null;
-export type SystemMessage = string | null;
-export type MaxTokens = number | null;
-export type TopP = number | null;
-export type Temperature = number | null;
-export type StopSeqs = string[] | null;
-export type BestOf = number | null;
-export type FrequencyPenalty = number | null;
-export type PresencePenalty = number | null;
-export type LogitBias = {
-  [k: string]: number;
-} | null;
-export type Seed = number | null;
-export type TopK = number | null;
-export type NumChoices = number | null;
-export type Logprobs = boolean | null;
-export type TopLogprobs = number | null;
-export type ParallelToolCalls = boolean | null;
-export type InternalTools = boolean | null;
-export type MaxToolOutput = number | null;
-export type CachePrompt = "auto" | boolean | null;
-export type ReasoningEffort = ("low" | "medium" | "high") | null;
-export type ReasoningTokens = number | null;
-export type ReasoningHistory = ("none" | "all" | "last" | "auto") | null;
-export type Name5 = string;
-export type Type2 =
-  | ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
-  | null;
-export type Description = string | null;
-export type Enum = unknown[] | null;
-export type Properties = {
-  [k: string]: JSONSchema;
-} | null;
-export type Additionalproperties = JSONSchema | boolean | null;
-export type Anyof = JSONSchema[] | null;
-export type Required = string[] | null;
-export type Description1 = string | null;
-export type Strict = boolean | null;
 export type TotalSamples = number;
 export type CompletedSamples = number;
 export type Name6 = string;
@@ -210,7 +215,8 @@ export type ParseError = string | null;
 export type Title = string | null;
 export type Format2 = "text" | "markdown";
 export type Content3 = string;
-export type Model1 = string | null;
+export type Type8 = string | null;
+export type Model2 = string | null;
 export type Id5 = string | null;
 export type Content4 =
   | string
@@ -225,7 +231,7 @@ export type Source3 = ("input" | "generate") | null;
 export type Role3 = "tool";
 export type ToolCallId1 = string | null;
 export type Function1 = string | null;
-export type Type8 =
+export type Type9 =
   | "parsing"
   | "timeout"
   | "unicode_decode"
@@ -246,7 +252,7 @@ export type Messages = (
   | ChatMessageAssistant
   | ChatMessageTool
 )[];
-export type Model2 = string;
+export type Model3 = string;
 export type StopReason =
   | "stop"
   | "max_tokens"
@@ -305,7 +311,7 @@ export type Timestamp1 = string;
 export type WorkingStart1 = number;
 export type Pending1 = boolean | null;
 export type Event1 = "sample_limit";
-export type Type9 =
+export type Type10 =
   | "message"
   | "time"
   | "working"
@@ -345,7 +351,8 @@ export type Timestamp5 = string;
 export type WorkingStart5 = number;
 export type Pending5 = boolean | null;
 export type Event5 = "model";
-export type Model3 = string;
+export type Model4 = string;
+export type Role4 = string | null;
 export type Input3 = (
   | ChatMessageSystem
   | ChatMessageUser
@@ -354,7 +361,7 @@ export type Input3 = (
 )[];
 export type Name8 = string;
 export type Description2 = string;
-export type Type10 = "object";
+export type Type11 = "object";
 export type Required1 = string[];
 export type Additionalproperties1 = boolean;
 export type Tools1 = ToolInfo[];
@@ -369,7 +376,7 @@ export type Timestamp6 = string;
 export type WorkingStart6 = number;
 export type Pending6 = boolean | null;
 export type Event6 = "tool";
-export type Type11 = "function";
+export type Type12 = "function";
 export type Id7 = string;
 export type Function2 = string;
 export type Result1 =
@@ -447,14 +454,14 @@ export type WorkingStart13 = number;
 export type Pending13 = boolean | null;
 export type Event13 = "step";
 export type Action1 = "begin" | "end";
-export type Type12 = string | null;
+export type Type13 = string | null;
 export type Name11 = string;
 export type Timestamp14 = string;
 export type WorkingStart14 = number;
 export type Pending14 = boolean | null;
 export type Event14 = "subtask";
 export type Name12 = string;
-export type Type13 = string | null;
+export type Type14 = string | null;
 export type Events2 = (
   | SampleInitEvent
   | SampleLimitEvent
@@ -515,7 +522,7 @@ export type Events = (
 export type TotalTime = number | null;
 export type WorkingTime3 = number | null;
 export type Uuid = string | null;
-export type Type14 =
+export type Type15 =
   | "context"
   | "time"
   | "working"
@@ -576,8 +583,10 @@ export interface EvalSpec {
   dataset: EvalDataset;
   sandbox: SandboxEnvironmentSpec | null;
   model: Model;
+  model_generate_config: GenerateConfig;
   model_base_url: ModelBaseUrl;
   model_args: ModelArgs;
+  model_roles: ModelRoles;
   config: EvalConfig;
   revision: EvalRevision | null;
   packages: Packages;
@@ -607,7 +616,73 @@ export interface SandboxEnvironmentSpec {
 export interface Config {
   [k: string]: unknown;
 }
+/**
+ * Model generation options.
+ */
+export interface GenerateConfig {
+  max_retries: MaxRetries;
+  timeout: Timeout;
+  max_connections: MaxConnections;
+  system_message: SystemMessage;
+  max_tokens: MaxTokens;
+  top_p: TopP;
+  temperature: Temperature;
+  stop_seqs: StopSeqs;
+  best_of: BestOf;
+  frequency_penalty: FrequencyPenalty;
+  presence_penalty: PresencePenalty;
+  logit_bias: LogitBias;
+  seed: Seed;
+  top_k: TopK;
+  num_choices: NumChoices;
+  logprobs: Logprobs;
+  top_logprobs: TopLogprobs;
+  parallel_tool_calls: ParallelToolCalls;
+  internal_tools: InternalTools;
+  max_tool_output: MaxToolOutput;
+  cache_prompt: CachePrompt;
+  reasoning_effort: ReasoningEffort;
+  reasoning_tokens: ReasoningTokens;
+  reasoning_history: ReasoningHistory;
+  response_schema: ResponseSchema | null;
+}
+/**
+ * Schema for model response when using Structured Output.
+ */
+export interface ResponseSchema {
+  name: Name1;
+  json_schema: JSONSchema;
+  description: Description1;
+  strict: Strict;
+}
+/**
+ * JSON Schema for type.
+ */
+export interface JSONSchema {
+  type: Type1;
+  description: Description;
+  default: Default;
+  enum: Enum;
+  items: JSONSchema | null;
+  properties: Properties;
+  additionalProperties: Additionalproperties;
+  anyOf: Anyof;
+  required: Required;
+}
+export interface Default {
+  [k: string]: unknown;
+}
 export interface ModelArgs {}
+/**
+ * Model config.
+ */
+export interface EvalModelConfig {
+  model: Model1;
+  config: GenerateConfig;
+  base_url: BaseUrl;
+  args: Args;
+}
+export interface Args {}
 /**
  * Configuration used for evaluation.
  */
@@ -653,7 +728,7 @@ export interface ApprovalPolicyConfig {
  * ```
  */
 export interface ApproverPolicyConfig {
-  name: Name1;
+  name: Name2;
   tools: Tools;
   params: Params;
 }
@@ -662,7 +737,7 @@ export interface Params {}
  * Git revision for evaluation.
  */
 export interface EvalRevision {
-  type: Type1;
+  type: Type2;
   origin: Origin;
   commit: Commit;
 }
@@ -670,23 +745,23 @@ export interface Packages {
   [k: string]: string;
 }
 export interface EvalScorer {
-  name: Name2;
+  name: Name3;
   options: Options;
   metrics: Metrics;
   metadata: Metadata1;
 }
 export interface EvalMetricDefinition {
-  name: Name3;
+  name: Name4;
   options: Options1;
 }
 /**
  * Plan (solvers) used in evaluation.
  */
 export interface EvalPlan {
-  name: Name4;
+  name: Name5;
   steps: Steps;
   finish: EvalPlanStep | null;
-  config: GenerateConfig;
+  config: GenerateConfig1;
 }
 /**
  * Solver step.
@@ -699,7 +774,7 @@ export interface Params1 {}
 /**
  * Model generation options.
  */
-export interface GenerateConfig {
+export interface GenerateConfig1 {
   max_retries: MaxRetries;
   timeout: Timeout;
   max_connections: MaxConnections;
@@ -726,32 +801,6 @@ export interface GenerateConfig {
   reasoning_history: ReasoningHistory;
   response_schema: ResponseSchema | null;
 }
-/**
- * Schema for model response when using Structured Output.
- */
-export interface ResponseSchema {
-  name: Name5;
-  json_schema: JSONSchema;
-  description: Description1;
-  strict: Strict;
-}
-/**
- * JSON Schema for type.
- */
-export interface JSONSchema {
-  type: Type2;
-  description: Description;
-  default: Default;
-  enum: Enum;
-  items: JSONSchema | null;
-  properties: Properties;
-  additionalProperties: Additionalproperties;
-  anyOf: Anyof;
-  required: Required;
-}
-export interface Default {
-  [k: string]: unknown;
-}
 /**
  * Scoring results from evaluation.
  */
@@ -916,7 +965,7 @@ export interface ChatMessageAssistant {
   internal: unknown;
   role: Role2;
   tool_calls: ToolCalls;
-  model: Model1;
+  model: Model2;
 }
 export interface ToolCall {
   id: Id4;
@@ -925,6 +974,7 @@ export interface ToolCall {
   internal: unknown;
   parse_error: ParseError;
   view: ToolCallContent | null;
+  type: Type8;
 }
 export interface Arguments {}
 /**
@@ -949,14 +999,14 @@ export interface ChatMessageTool {
   error: ToolCallError | null;
 }
 export interface ToolCallError {
-  type: Type8;
+  type: Type9;
   message: Message1;
 }
 /**
  * Output from model generation.
  */
 export interface ModelOutput {
-  model: Model2;
+  model: Model3;
   choices: Choices1;
   usage: ModelUsage1 | null;
   time: Time;
@@ -1037,7 +1087,7 @@ export interface SampleLimitEvent {
   working_start: WorkingStart1;
   pending: Pending1;
   event: Event1;
-  type: Type9;
+  type: Type10;
   message: Message2;
   limit: Limit1;
 }
@@ -1100,11 +1150,12 @@ export interface ModelEvent {
   working_start: WorkingStart5;
   pending: Pending5;
   event: Event5;
-  model: Model3;
+  model: Model4;
+  role: Role4;
   input: Input3;
   tools: Tools1;
   tool_choice: ToolChoice;
-  config: GenerateConfig1;
+  config: GenerateConfig;
   output: ModelOutput;
   error: Error1;
   cache: Cache;
@@ -1147,7 +1198,7 @@ export interface ToolInfo {
  * Description of tool parameters object in JSON Schema format.
  */
 export interface ToolParams {
-  type: Type10;
+  type: Type11;
   properties: Properties1;
   required: Required1;
   additionalProperties: Additionalproperties1;
@@ -1158,36 +1209,6 @@ export interface Properties1 {
 export interface ToolFunction {
   name: Name9;
 }
-/**
- * Model generation options.
- */
-export interface GenerateConfig1 {
-  max_retries: MaxRetries;
-  timeout: Timeout;
-  max_connections: MaxConnections;
-  system_message: SystemMessage;
-  max_tokens: MaxTokens;
-  top_p: TopP;
-  temperature: Temperature;
-  stop_seqs: StopSeqs;
-  best_of: BestOf;
-  frequency_penalty: FrequencyPenalty;
-  presence_penalty: PresencePenalty;
-  logit_bias: LogitBias;
-  seed: Seed;
-  top_k: TopK;
-  num_choices: NumChoices;
-  logprobs: Logprobs;
-  top_logprobs: TopLogprobs;
-  parallel_tool_calls: ParallelToolCalls;
-  internal_tools: InternalTools;
-  max_tool_output: MaxToolOutput;
-  cache_prompt: CachePrompt;
-  reasoning_effort: ReasoningEffort;
-  reasoning_tokens: ReasoningTokens;
-  reasoning_history: ReasoningHistory;
-  response_schema: ResponseSchema | null;
-}
 /**
  * Model call (raw request/response data).
  */
@@ -1210,7 +1231,7 @@ export interface ToolEvent {
   working_start: WorkingStart6;
   pending: Pending6;
   event: Event6;
-  type: Type11;
+  type: Type12;
   id: Id7;
   function: Function2;
   arguments: Arguments1;
@@ -1332,7 +1353,7 @@ export interface StepEvent {
   pending: Pending13;
   event: Event13;
   action: Action1;
-  type: Type12;
+  type: Type13;
   name: Name11;
 }
 /**
@@ -1344,7 +1365,7 @@ export interface SubtaskEvent {
   pending: Pending14;
   event: Event14;
   name: Name12;
-  type: Type13;
+  type: Type14;
   input: Input5;
   result: Result2;
   events: Events2;
@@ -1365,7 +1386,7 @@ export interface Attachments {
  * Limit encontered by sample.
  */
 export interface EvalSampleLimit {
-  type: Type14;
+  type: Type15;
   limit: Limit2;
 }
 /**

inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.module.css ADDED Viewed

@@ -0,0 +1,16 @@
+.container {
+  display: flex;
+  flex-direction: row;
+  flex-wrap: wrap;
+  gap: 0;
+  margin-top: -0.2rem;
+  margin-bottom: 0.2rem;
+}
+.grid {
+  display: grid;
+  grid-template-rows: repeat(auto-fill, minmax(10px, 1fr));
+  grid-template-columns: 1fr;
+  gap: 0.1em;
+  padding-right: 1em;
+}

inspect_ai/_view/www/src/workspace/navbar/ModelRolesView.tsx ADDED Viewed

@@ -0,0 +1,43 @@
+import { FC } from "react";
+import { ModelRoles } from "../../types/log";
+import clsx from "clsx";
+import styles from "./ModelRolesView.module.css";
+interface ModelRolesViewProps {
+  roles: ModelRoles;
+}
+/**
+ * Renders the Navbar
+ */
+export const ModelRolesView: FC<ModelRolesViewProps> = ({ roles }) => {
+  roles = roles || {};
+  // Render as a single line if there is only a single
+  // model role
+  const singleLine = Object.keys(roles).length !== 1;
+  // Render a layout of model roles
+  const modelEls = Object.keys(roles).map((key) => {
+    const role = key;
+    const roleData = roles[role];
+    const model = roleData.model;
+    return (
+      <div
+        className={clsx(
+          singleLine ? styles.grid : undefined,
+          "text-style-secondary",
+          "text-size-smallest",
+        )}
+        key={key}
+      >
+        <span className={clsx("text-style-label")}>{role}:</span>
+        <span>{model}</span>
+      </div>
+    );
+  });
+  return modelEls.length > 0 ? (
+    <div className={styles.container}>{modelEls}</div>
+  ) : undefined;
+};

inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css CHANGED Viewed

@@ -46,7 +46,7 @@
 .secondaryContainer {
   opacity: 0.7;
-  margin-top: 0.1rem;
+  margin-top: -0.1rem;
   padding-bottom: 0;
   display: grid;
   grid-template-columns: minmax(0, max-content) max-content;

inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx CHANGED Viewed

@@ -7,6 +7,7 @@ import { kModelNone } from "../../constants";
 import { useStore } from "../../state/store";
 import { EvalResults, EvalSpec, Status } from "../../types/log";
 import { filename } from "../../utils/path";
+import { ModelRolesView } from "./ModelRolesView";
 import styles from "./PrimaryBar.module.css";
 import {
   displayScorersFromRunningMetrics,
@@ -100,6 +101,10 @@ export const PrimaryBar: FC<PrimaryBarProps> = ({
               ""
             )}
           </div>
+          {evalSpec?.model_roles ? (
+            <ModelRolesView roles={evalSpec.model_roles} />
+          ) : undefined}
           <div className={clsx("text-size-small", styles.secondaryContainer)}>
             <div className={clsx("navbar-secondary-text", "text-truncate")}>
               {logFileName}

inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx CHANGED Viewed

@@ -1,6 +1,7 @@
 import { FC } from "react";
 import { SampleSummary } from "../../api/types";
 import { MessageBand } from "../../components/MessageBand";
+import { ModelCard } from "../../plan/ModelCard";
 import { PlanCard } from "../../plan/PlanCard";
 import {
   EvalError,
@@ -55,6 +56,7 @@ export const InfoTab: FC<PlanTabProps> = ({
           evalPlan={evalPlan}
           scores={evalResults?.scores}
         />
+        {evalSpec ? <ModelCard evalSpec={evalSpec} /> : undefined}
         {evalStatus !== "started" ? <UsageCard stats={evalStats} /> : undefined}
         {evalStatus === "error" && evalError ? (
           <TaskErrorCard error={evalError} />

inspect_ai/agent/_agent.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from copy import copy, deepcopy
 from functools import wraps
+from inspect import signature
 from typing import (
     Any,
     Callable,
@@ -7,6 +8,7 @@ from typing import (
     Protocol,
     TypeGuard,
     cast,
+    get_type_hints,
     overload,
     runtime_checkable,
 )
@@ -189,6 +191,16 @@ def agent(
             )
             return agent
+        # If a user's code runs "from __future__ import annotations", all type annotations are stored as strings,
+        # which can break introspection-based mechanisms (like inspecting a function’s signature).
+        # The following two lines resolve these string annotations using the original function's globals,
+        # ensuring that any forward references (e.g., "Agent") are evaluated to their actual types,
+        # and then reassign the original function's signature to the wrapper.
+        agent_wrapper.__annotations__ = get_type_hints(
+            agent_wrapper, agent_type.__globals__
+        )
+        agent_wrapper.__signature__ = signature(agent_type)  # type: ignore[attr-defined]
         # register
         return agent_register(cast(Callable[P, Agent], agent_wrapper), agent_name)

inspect_ai/agent/_as_tool.py CHANGED Viewed

@@ -42,7 +42,7 @@ def as_tool(agent: Agent, description: str | None = None, **agent_kwargs: Any) -
     async def execute(input: str, *args: Any, **kwargs: Any) -> ToolResult:
         # prepare state and call agent
-        state = AgentState(messages=[ChatMessageUser(content=input)])
+        state = AgentState(messages=[ChatMessageUser(content=input, source="input")])
         state = await agent(state, *args, **(agent_kwargs | kwargs))
         # find assistant message to read content from (prefer output)

inspect-ai 0.3.87__py3-none-any.whl → 0.3.89__py3-none-any.whl

inspect-ai 0.3.87py3-none-any.whl → 0.3.89py3-none-any.whl