PyPI - inspect-ai - Versions diffs - 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl - Mend

inspect-ai 0.3.55py3-none-any.whl → 0.3.57py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

inspect_ai/__init__.py +1 -0
inspect_ai/_cli/common.py +1 -1
inspect_ai/_cli/trace.py +33 -20
inspect_ai/_display/core/active.py +1 -1
inspect_ai/_display/core/display.py +1 -1
inspect_ai/_display/core/footer.py +1 -1
inspect_ai/_display/core/panel.py +1 -1
inspect_ai/_display/core/progress.py +0 -6
inspect_ai/_display/core/rich.py +1 -1
inspect_ai/_display/rich/display.py +2 -2
inspect_ai/_display/textual/app.py +15 -17
inspect_ai/_display/textual/widgets/clock.py +3 -3
inspect_ai/_display/textual/widgets/samples.py +6 -13
inspect_ai/_eval/context.py +9 -1
inspect_ai/_eval/run.py +16 -11
inspect_ai/_eval/score.py +4 -10
inspect_ai/_eval/task/results.py +5 -4
inspect_ai/_eval/task/run.py +6 -12
inspect_ai/_eval/task/task.py +10 -0
inspect_ai/_util/ansi.py +31 -0
inspect_ai/_util/datetime.py +1 -1
inspect_ai/_util/deprecation.py +1 -1
inspect_ai/_util/format.py +7 -0
inspect_ai/_util/json.py +11 -1
inspect_ai/_util/logger.py +14 -13
inspect_ai/_util/throttle.py +10 -1
inspect_ai/_util/trace.py +79 -47
inspect_ai/_util/transcript.py +37 -4
inspect_ai/_util/vscode.py +51 -0
inspect_ai/_view/notify.py +2 -1
inspect_ai/_view/www/.prettierrc.js +12 -0
inspect_ai/_view/www/App.css +22 -1
inspect_ai/_view/www/dist/assets/index.css +2374 -2
inspect_ai/_view/www/dist/assets/index.js +29752 -24492
inspect_ai/_view/www/log-schema.json +262 -215
inspect_ai/_view/www/package.json +1 -0
inspect_ai/_view/www/src/App.mjs +19 -9
inspect_ai/_view/www/src/Types.mjs +0 -1
inspect_ai/_view/www/src/api/Types.mjs +15 -4
inspect_ai/_view/www/src/api/api-http.mjs +2 -0
inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
inspect_ai/_view/www/src/components/Tools.mjs +28 -5
inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
inspect_ai/_view/www/src/types/log.d.ts +28 -20
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
inspect_ai/_view/www/yarn.lock +44 -0
inspect_ai/approval/_apply.py +4 -0
inspect_ai/approval/_human/panel.py +5 -8
inspect_ai/dataset/_dataset.py +51 -10
inspect_ai/dataset/_util.py +31 -3
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_log.py +30 -2
inspect_ai/log/_recorders/eval.py +2 -0
inspect_ai/model/_call_tools.py +31 -7
inspect_ai/model/_chat_message.py +3 -0
inspect_ai/model/_model.py +42 -1
inspect_ai/model/_providers/anthropic.py +4 -0
inspect_ai/model/_providers/google.py +24 -6
inspect_ai/model/_providers/openai.py +17 -3
inspect_ai/model/_providers/openai_o1.py +10 -12
inspect_ai/model/_render.py +9 -2
inspect_ai/scorer/_metric.py +12 -1
inspect_ai/solver/__init__.py +2 -0
inspect_ai/solver/_human_agent/agent.py +83 -0
inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
inspect_ai/solver/_human_agent/commands/clock.py +70 -0
inspect_ai/solver/_human_agent/commands/command.py +59 -0
inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
inspect_ai/solver/_human_agent/commands/note.py +42 -0
inspect_ai/solver/_human_agent/commands/score.py +80 -0
inspect_ai/solver/_human_agent/commands/status.py +62 -0
inspect_ai/solver/_human_agent/commands/submit.py +151 -0
inspect_ai/solver/_human_agent/install.py +222 -0
inspect_ai/solver/_human_agent/panel.py +252 -0
inspect_ai/solver/_human_agent/service.py +45 -0
inspect_ai/solver/_human_agent/state.py +55 -0
inspect_ai/solver/_human_agent/view.py +24 -0
inspect_ai/solver/_task_state.py +28 -2
inspect_ai/tool/_tool.py +10 -2
inspect_ai/tool/_tool_info.py +2 -1
inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
inspect_ai/tool/_tools/_web_browser/_web_browser.py +16 -13
inspect_ai/util/__init__.py +12 -4
inspect_ai/{_util/display.py → util/_display.py} +6 -0
inspect_ai/util/_panel.py +31 -9
inspect_ai/util/_sandbox/__init__.py +0 -3
inspect_ai/util/_sandbox/context.py +5 -1
inspect_ai/util/_sandbox/docker/compose.py +17 -13
inspect_ai/util/_sandbox/docker/docker.py +9 -6
inspect_ai/util/_sandbox/docker/internal.py +1 -1
inspect_ai/util/_sandbox/docker/util.py +3 -2
inspect_ai/util/_sandbox/environment.py +6 -5
inspect_ai/util/_sandbox/local.py +1 -1
inspect_ai/util/_sandbox/self_check.py +18 -18
inspect_ai/util/_sandbox/service.py +22 -7
inspect_ai/util/_store.py +7 -8
inspect_ai/util/_store_model.py +110 -0
inspect_ai/util/_subprocess.py +3 -3
inspect_ai/util/_throttle.py +32 -0
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +131 -108
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0

inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs CHANGED Viewed

@@ -60,10 +60,11 @@ export const ModelEventView = ({ id, event, style }) => {
   };
   // For any user messages which immediately preceded this model call, including a
-  // panel and display those user messages
+  // panel and display those user messages (exclude tool_call messages as they
+  // are already shown in the tool call above)
   const userMessages = [];
   for (const msg of event.input.slice().reverse()) {
-    if (msg.role === "user") {
+    if (msg.role === "user" && !msg.tool_call_id) {
       userMessages.push(msg);
     } else {
       break;

inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs CHANGED Viewed

@@ -37,7 +37,7 @@ export const ToolEventView = ({ id, event, style, depth }) => {
       functionCall=${functionCall}
       input=${input}
       inputType=${inputType}
-      output=${event.result}
+      output=${event.error?.message || event.result}
       mode="compact"
       view=${event.view}
       />

inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs CHANGED Viewed

@@ -155,6 +155,7 @@ export const RenderedEventNode = ({ id, node, style }) => {
         id=${id}
         event=${node.event}
         style=${style}
+        isStore=${true}
       />`;
     case "subtask":

inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs CHANGED Viewed

@@ -2,6 +2,7 @@
 import { html } from "htm/preact";
 import { ChatView } from "../../../components/ChatView.mjs";
 import { FontSize, TextStyle } from "../../../appearance/Fonts.mjs";
+import { HumanBaselineView } from "../../../components/HumanBaselineView.mjs";
 /**
  * @typedef {Object} Signature
@@ -62,6 +63,58 @@ const add_tools = {
   },
 };
+const humanAgentKey = (key) => {
+  return `HumanAgentState:${key}`;
+};
+const human_baseline_session = {
+  type: "human_baseline_session",
+  signature: {
+    add: ["HumanAgentState:logs"],
+    replace: [],
+    remove: [],
+  },
+  render: (changes, resolvedState) => {
+    // Read the session values
+    const started = resolvedState[humanAgentKey("started_running")];
+    const runtime = resolvedState[humanAgentKey("accumulated_time")];
+    const answer = resolvedState[humanAgentKey("answer")];
+    const completed = !!answer;
+    const running = resolvedState[humanAgentKey("running_state")];
+    const rawSessions = resolvedState[humanAgentKey("logs")];
+    // Tweak the date value
+    const startedDate = started ? new Date(started * 1000) : undefined;
+    // Convert raw sessions into session logs
+    const sessions = {};
+    if (rawSessions) {
+      for (const key of Object.keys(rawSessions)) {
+        const value = rawSessions[key];
+        // this pulls the key apart into
+        // <user>_<timestamp>.<type>
+        const match = key.match(/(.*)_(\d+_\d+)\.(.*)/);
+        if (match) {
+          const user = match[1];
+          const timestamp = match[2];
+          const type = match[3];
+          sessions[timestamp] = sessions[timestamp] || {};
+          sessions[timestamp][type] = value;
+          sessions[timestamp]["user"] = user;
+        }
+      }
+    }
+    return html`<${HumanBaselineView}
+      started=${startedDate}
+      running=${running}
+      completed=${completed}
+      answer=${answer}
+      runtime=${runtime}
+      sessionLogs=${Object.values(sessions)}
+    />`;
+  },
+};
 const renderTools = (changes, resolvedState) => {
   // Find which tools were added in this change
   const toolIndexes = [];
@@ -136,6 +189,9 @@ export const RenderableChangeTypes = [
   add_tools,
 ];
+/** @type {ChangeType[]} */
+export const StoreSpecificRenderableTypes = [human_baseline_session];
 /**
  * @typedef {Object} ToolParameters
  * @property {string} type - The type of the parameters object, typically "object".

inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs CHANGED Viewed

@@ -2,7 +2,10 @@
 import { html } from "htm/preact";
 import { EventPanel } from "../EventPanel.mjs";
-import { RenderableChangeTypes } from "./StateEventRenderers.mjs";
+import {
+  RenderableChangeTypes,
+  StoreSpecificRenderableTypes,
+} from "./StateEventRenderers.mjs";
 import { StateDiffView } from "./StateDiffView.mjs";
 import { formatDateTime } from "../../../utils/Format.mjs";
@@ -12,10 +15,11 @@ import { formatDateTime } from "../../../utils/Format.mjs";
  * @param {Object} props - The properties passed to the component.
  * @param { string  } props.id - The id of this event.
  * @param {import("../../../types/log").StateEvent } props.event - The event object to display.
+ * @param { boolean } props.isStore - Whether this event view is rendering a storage (rather than a state)
  * @param { Object } props.style - The style of this event.
  * @returns {import("preact").JSX.Element} The component.
  */
-export const StateEventView = ({ id, event, style }) => {
+export const StateEventView = ({ id, event, isStore, style }) => {
   const summary = summarizeChanges(event.changes);
   // Synthesize objects for comparison
@@ -32,7 +36,11 @@ export const StateEventView = ({ id, event, style }) => {
   // This clone is important since the state is used by preact as potential values that are rendered
   // and as a result may be decorated with additional properties, etc..., resulting in DOM elements
   // appearing attached to state.
-  const changePreview = generatePreview(event.changes, structuredClone(after));
+  const changePreview = generatePreview(
+    event.changes,
+    structuredClone(after),
+    isStore,
+  );
   if (changePreview) {
     tabs.unshift(
       html`<div name="Summary" style=${{ margin: "1em 0em", width: "100%" }}>
@@ -55,11 +63,15 @@ export const StateEventView = ({ id, event, style }) => {
  *
  * @param {import("../../../types/log").JsonChange[]} changes - The change object containing the value.
  * @param {Object} resolvedState - The change object containing the value.
+ * @param {boolean} isStore - Is this rendering a store event
  * @returns {import("preact").JSX.Element|Object|string|undefined} - The rendered HTML template if the value is an object with content and source, otherwise the value itself.
  */
-const generatePreview = (changes, resolvedState) => {
+const generatePreview = (changes, resolvedState, isStore) => {
   const results = [];
-  for (const changeType of RenderableChangeTypes) {
+  for (const changeType of [
+    ...RenderableChangeTypes,
+    ...(isStore ? StoreSpecificRenderableTypes : []),
+  ]) {
     // Note that we currently only have renderers that depend upon
     // add, remove, replace, but we should likely add
     // move, copy, test

inspect_ai/_view/www/src/types/asciicinema-player.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+declare module "asciinema-player" {
+  export const create: (
+    src: string | Object,
+    el: HTMLElement,
+    opts: {
+      cols?: number;
+      rows?: number;
+      autoPlay?: boolean;
+      preload?: boolean;
+      loop?: boolean;
+      theme?: string;
+      startAt?: number | string;
+      speed?: number;
+      idleTimeLimit?: number;
+      poster?: string;
+      fit?: string;
+      controls?: boolean;
+      markers?: Array<number> | Array<[number, string]>;
+      pauseOnMarkers?: boolean;
+      terminalFontSize?: string;
+      terminalFontFamily?: string;
+      terminalLineHeight?: string;
+      logger?: Object;
+    },
+  ) => any;
+}

inspect_ai/_view/www/src/types/log.d.ts CHANGED Viewed

@@ -29,6 +29,7 @@ export type SandboxEnvironmentSpec = [unknown] | [unknown, unknown];
 export type Model = string;
 export type ModelBaseUrl = string | null;
 export type Limit = number | [unknown, unknown] | null;
+export type SampleId = string | number | (string | number)[] | null;
 export type Epochs = number | null;
 export type EpochsReducer = string[] | null;
 export type Trace = boolean | null;
@@ -42,10 +43,12 @@ export type TimeLimit = number | null;
 export type MaxSamples = number | null;
 export type MaxTasks = number | null;
 export type MaxSubprocesses = number | null;
+export type MaxSandboxes = number | null;
 export type SandboxCleanup = boolean | null;
 export type LogSamples = boolean | null;
 export type LogImages = boolean | null;
 export type LogBuffer = number | null;
+export type ScoreDisplay = boolean | null;
 export type Type = "git";
 export type Origin = string;
 export type Commit = string;
@@ -76,6 +79,7 @@ export type TopLogprobs = number | null;
 export type ParallelToolCalls = boolean | null;
 export type MaxToolOutput = number | null;
 export type CachePrompt = "auto" | boolean | null;
+export type ReasoningEffort = ("low" | "medium" | "high") | null;
 export type TotalSamples = number;
 export type CompletedSamples = number;
 export type Name3 = string;
@@ -119,6 +123,7 @@ export type Role = "system";
 export type Content1 = string | (ContentText | ContentImage)[];
 export type Source1 = ("input" | "generate") | null;
 export type Role1 = "user";
+export type ToolCallId = string | null;
 export type Content2 = string | (ContentText | ContentImage)[];
 export type Source2 = ("input" | "generate") | null;
 export type Role2 = "assistant";
@@ -133,7 +138,7 @@ export type Content3 = string;
 export type Content4 = string | (ContentText | ContentImage)[];
 export type Source3 = ("input" | "generate") | null;
 export type Role3 = "tool";
-export type ToolCallId = string | null;
+export type ToolCallId1 = string | null;
 export type Function1 = string | null;
 export type Type4 =
   | "parsing"
@@ -241,14 +246,10 @@ export type Name5 = string;
 export type Description = string;
 export type Type6 = "object";
 export type Type7 =
-  | "string"
-  | "integer"
-  | "number"
-  | "boolean"
-  | "array"
-  | "object"
-  | "null";
+  | ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
+  | null;
 export type Description1 = string | null;
+export type Enum = unknown[] | null;
 export type Properties1 = {
   [k: string]: ToolParam;
 } | null;
@@ -267,7 +268,13 @@ export type Event5 = "tool";
 export type Type8 = "function";
 export type Id3 = string;
 export type Function2 = string;
-export type Result = string | number | boolean | (ContentText | ContentImage)[];
+export type Result =
+  | string
+  | number
+  | boolean
+  | ContentText
+  | ContentImage
+  | (ContentText | ContentImage)[];
 export type Truncated = [unknown, unknown] | null;
 export type Timestamp6 = string;
 export type Pending6 = boolean | null;
@@ -388,8 +395,8 @@ export type Value2 =
 export type Answer1 = string | null;
 export type Explanation2 = string | null;
 export type Metadata8 = {} | null;
-export type SampleId = string | number | null;
-export type Samples2 = SampleScore[];
+export type SampleId1 = string | number | null;
+export type Samples2 = EvalSampleScore[];
 export type Location1 = string;
 export interface EvalLog {
@@ -438,6 +445,7 @@ export interface EvalDataset {
 export interface ModelArgs {}
 export interface EvalConfig {
   limit: Limit;
+  sample_id: SampleId;
   epochs: Epochs;
   epochs_reducer: EpochsReducer;
   trace: Trace;
@@ -449,10 +457,12 @@ export interface EvalConfig {
   max_samples: MaxSamples;
   max_tasks: MaxTasks;
   max_subprocesses: MaxSubprocesses;
+  max_sandboxes: MaxSandboxes;
   sandbox_cleanup: SandboxCleanup;
   log_samples: LogSamples;
   log_images: LogImages;
   log_buffer: LogBuffer;
+  score_display: ScoreDisplay;
 }
 export interface ApprovalPolicyConfig {
   approvers: Approvers;
@@ -523,6 +533,7 @@ export interface GenerateConfig {
   parallel_tool_calls: ParallelToolCalls;
   max_tool_output: MaxToolOutput;
   cache_prompt: CachePrompt;
+  reasoning_effort: ReasoningEffort;
 }
 export interface EvalResults {
   total_samples: TotalSamples;
@@ -607,6 +618,7 @@ export interface ChatMessageUser {
   content: Content1;
   source: Source1;
   role: Role1;
+  tool_call_id: ToolCallId;
 }
 export interface ChatMessageAssistant {
   content: Content2;
@@ -635,7 +647,7 @@ export interface ChatMessageTool {
   content: Content4;
   source: Source3;
   role: Role3;
-  tool_call_id: ToolCallId;
+  tool_call_id: ToolCallId1;
   function: Function1;
   error: ToolCallError | null;
 }
@@ -825,6 +837,7 @@ export interface ToolParam {
   type: Type7;
   description: Description1;
   default: Default;
+  enum: Enum;
   items: ToolParam | null;
   properties: Properties1;
   additionalProperties: Additionalproperties;
@@ -862,6 +875,7 @@ export interface GenerateConfig1 {
   parallel_tool_calls: ParallelToolCalls;
   max_tool_output: MaxToolOutput;
   cache_prompt: CachePrompt;
+  reasoning_effort: ReasoningEffort;
 }
 /**
  * Model call (raw request/response data).
@@ -1020,16 +1034,10 @@ export interface EvalSampleReductions {
   reducer: Reducer1;
   samples: Samples2;
 }
-/**
- * Score for a Sample
- *
- * Args:
- *    sample_id: (str | int | None) Unique id of a sample
- */
-export interface SampleScore {
+export interface EvalSampleScore {
   value: Value2;
   answer: Answer1;
   explanation: Explanation2;
   metadata: Metadata8;
-  sample_id: SampleId;
+  sample_id: SampleId1;
 }

inspect_ai/_view/www/src/workspace/WorkSpace.mjs CHANGED Viewed

@@ -150,7 +150,7 @@ export const WorkSpace = ({
     // The samples tab
     // Currently only appears when the result is successful
-    if (evalStatus !== "error" && sampleMode !== "none") {
+    if (sampleMode !== "none") {
       resolvedTabs.samples = {
         id: kEvalWorkspaceTabId,
         scrollable: samples.length === 1,

inspect_ai/_view/www/yarn.lock CHANGED Viewed

@@ -131,6 +131,13 @@
   dependencies:
     "@babel/types" "^7.25.2"
+"@babel/runtime@^7.21.0":
+  version "7.26.0"
+  resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.26.0.tgz#8600c2f595f277c60815256418b85356a65173c1"
+  integrity sha512-FDSOghenHTiToteC/QRlv2q3DhPZ/oOXTBoirfWNx1Cx3TMVcGWQtMMmQcSvb/JjpNeGzx8Pq/b4fKEJuWm1sw==
+  dependencies:
+    regenerator-runtime "^0.14.0"
 "@babel/template@^7.25.0":
   version "7.25.0"
   resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.25.0.tgz#e733dc3134b4fede528c15bc95e89cb98c52592a"
@@ -525,6 +532,14 @@ argparse@^2.0.1:
   resolved "https://registry.yarnpkg.com/argparse/-/argparse-2.0.1.tgz#246f50f3ca78a3240f6c997e8a9bd1eac49e4b38"
   integrity sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==
+asciinema-player@^3.8.1:
+  version "3.8.1"
+  resolved "https://registry.yarnpkg.com/asciinema-player/-/asciinema-player-3.8.1.tgz#d56ccc04a85570559900b2297cf44c2a7453d118"
+  integrity sha512-NkpbFg81Y6iJFpDRndakLCQ0G26XSpvuT3vJTFjMRgHb26lqHgRNY9gun54e5MehZ4fEDNYkMZv+z6MfZ8c2aA==
+  dependencies:
+    "@babel/runtime" "^7.21.0"
+    solid-js "^1.3.0"
 babel-plugin-prismjs@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/babel-plugin-prismjs/-/babel-plugin-prismjs-2.1.0.tgz#ade627896106326ad04d6d77fba92877618de571"
@@ -647,6 +662,11 @@ cross-spawn@^7.0.2:
     shebang-command "^2.0.0"
     which "^2.0.1"
+csstype@^3.1.0:
+  version "3.1.3"
+  resolved "https://registry.yarnpkg.com/csstype/-/csstype-3.1.3.tgz#d80ff294d114fb0e6ac500fbf85b60137d7eff81"
+  integrity sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==
 cuint@^0.2.2:
   version "0.2.2"
   resolved "https://registry.yarnpkg.com/cuint/-/cuint-0.2.2.tgz#408086d409550c2631155619e9fa7bcadc3b991b"
@@ -1242,6 +1262,11 @@ queue-microtask@^1.2.2:
   resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243"
   integrity sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==
+regenerator-runtime@^0.14.0:
+  version "0.14.1"
+  resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f"
+  integrity sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==
 resolve-from@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6"
@@ -1294,6 +1319,16 @@ semver@^6.0.0, semver@^6.3.1:
   resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4"
   integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==
+seroval-plugins@^1.1.0:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/seroval-plugins/-/seroval-plugins-1.1.1.tgz#1e0c175e13bb4c620d4ce5916fbbb63de70c31f9"
+  integrity sha512-qNSy1+nUj7hsCOon7AO4wdAIo9P0jrzAMp18XhiOzA6/uO5TKtP7ScozVJ8T293oRIvi5wyCHSM4TrJo/c/GJA==
+seroval@^1.1.0:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/seroval/-/seroval-1.1.1.tgz#7630e0c17a3efa6be43f17ad6bcf9f966a61b443"
+  integrity sha512-rqEO6FZk8mv7Hyv4UCj3FD3b6Waqft605TLfsCe/BiaylRpyyMC0b+uA5TJKawX3KzMrdi3wsLbCaLplrQmBvQ==
 shebang-command@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea"
@@ -1306,6 +1341,15 @@ shebang-regex@^3.0.0:
   resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172"
   integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==
+solid-js@^1.3.0:
+  version "1.9.3"
+  resolved "https://registry.yarnpkg.com/solid-js/-/solid-js-1.9.3.tgz#078f026fe32f6b9b48e8e0557be150f0c2d610a9"
+  integrity sha512-5ba3taPoZGt9GY3YlsCB24kCg0Lv/rie/HTD4kG6h4daZZz7+yK02xn8Vx8dLYBc9i6Ps5JwAbEiqjmKaLB3Ag==
+  dependencies:
+    csstype "^3.1.0"
+    seroval "^1.1.0"
+    seroval-plugins "^1.1.0"
 source-map-js@^1.2.0:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-1.2.0.tgz#16b809c162517b5b8c3e7dcd315a2a5c2612b2af"

inspect_ai/approval/_apply.py CHANGED Viewed

@@ -75,4 +75,8 @@ def init_tool_approval(approval: list[ApprovalPolicy] | None) -> None:
         _tool_approver.set(None)
+def have_tool_approval() -> bool:
+    return _tool_approver.get(None) is not None
 _tool_approver: ContextVar[Approver | None] = ContextVar("tool_approver", default=None)

inspect_ai/approval/_human/panel.py CHANGED Viewed

@@ -24,8 +24,6 @@ from .util import (
     render_tool_approval,
 )
-PANEL_TITLE = "Approvals"
 async def panel_approval(
     message: str,
@@ -35,7 +33,7 @@ async def panel_approval(
     choices: list[ApprovalDecision],
 ) -> Approval:
     # ensure the approvals panel is shown
-    await input_panel(PANEL_TITLE, ApprovalInputPanel)
+    await input_panel(ApprovalInputPanel)
     # submit to human approval manager (will be picked up by panel)
     approvals = human_approval_manager()
@@ -52,11 +50,10 @@ async def panel_approval(
 class ApprovalInputPanel(InputPanel):
+    DEFAULT_TITLE = "Approval"
     DEFAULT_CSS = """
     ApprovalInputPanel {
-        width: 1fr;
-        height: 1fr;
-        padding: 0 1 1 1;
         layout: grid;
         grid-size: 1 3;
         grid-rows: auto 1fr auto;
@@ -88,7 +85,7 @@ class ApprovalInputPanel(InputPanel):
         self._approvals = human_approval_manager().approval_requests()
         if len(self._approvals) > 0:
             approval_id, approval_request = self._approvals[0]
-            self.title = f"{PANEL_TITLE} ({len(self._approvals):,})"
+            self.title = f"{self.DEFAULT_TITLE} ({len(self._approvals):,})"
             heading.request = approval_request
             content.approval = approval_request.request
             actions.approval_request = approval_id, approval_request
@@ -97,7 +94,7 @@ class ApprovalInputPanel(InputPanel):
                 actions.activate()
             self.visible = True
         else:
-            self.title = PANEL_TITLE
+            self.title = self.DEFAULT_TITLE
             heading.request = None
             content.approval = None
             actions.approval_request = None

inspect_ai/dataset/_dataset.py CHANGED Viewed

@@ -1,16 +1,19 @@
 import abc
 import random
+from dataclasses import dataclass, field
 from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
     Iterator,
     Sequence,
+    Type,
+    TypeVar,
     Union,
     overload,
 )
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, ValidationError
 from typing_extensions import override
 from inspect_ai.model import ChatMessage
@@ -20,6 +23,8 @@ from inspect_ai.util._sandbox.environment import resolve_sandbox_environment
 if TYPE_CHECKING:
     from _typeshed import SupportsRichComparison
+MT = TypeVar("MT", bound=BaseModel)
 class Sample(BaseModel):
     def __init__(
@@ -76,6 +81,20 @@ class Sample(BaseModel):
     metadata: dict[str, Any] | None = Field(default=None)
     """Arbitrary metadata associated with the sample."""
+    def metadata_as(self, metadata_cls: Type[MT]) -> MT:
+        """Metadata as a Pydantic model.
+        Args:
+           metadata_cls: BaseModel derived class.
+        Returns:
+           BaseModel: Instance of metadata_cls.
+        """
+        if self.metadata is None:
+            raise ValueError("Sample does not have metadata")
+        return metadata_as(self.metadata, metadata_cls)
     sandbox: SandboxEnvironmentSpec | None = Field(default=None)
     """Sandbox environment type and optional config file."""
@@ -177,7 +196,8 @@ class Dataset(Sequence[Sample], abc.ABC):
         """
-class FieldSpec(BaseModel):
+@dataclass
+class FieldSpec:
     r"""Specification for mapping data source fields to sample fields.
     Args:
@@ -191,28 +211,28 @@ class FieldSpec(BaseModel):
         setup (str): Optional. Setup script to run for sample .
     """
-    input: str = Field(default="input")
+    input: str = field(default="input")
     """Name of the field containing the sample input."""
-    target: str = Field(default="target")
+    target: str = field(default="target")
     """Name of the field containing the sample target."""
-    choices: str = Field(default="choices")
+    choices: str = field(default="choices")
     """Name of field containing the list of answer choices."""
-    id: str = Field(default="id")
+    id: str = field(default="id")
     """ Unique identifier for the sample."""
-    metadata: list[str] | None = Field(default=None)
+    metadata: list[str] | Type[BaseModel] | None = field(default=None)
     """List of additional field names that should be read as metadata."""
-    sandbox: str = Field(default="sandbox")
+    sandbox: str = field(default="sandbox")
     """Sandbox type along with optional config file."""
-    files: str = Field(default="files")
+    files: str = field(default="files")
     """Files that go along wtih the sample."""
-    setup: str = Field(default="setup")
+    setup: str = field(default="setup")
     """Setup script to run for sample (run within default SandboxEnvironment)."""
@@ -313,3 +333,24 @@ class MemoryDataset(Dataset):
             samples=[sample for sample in self if predicate(sample)],
             shuffled=self.shuffled,
         )
+def metadata_as(metadata: dict[str, Any], metadata_cls: Type[MT]) -> MT:
+    # validate that metadata_cls is frozen
+    if not metadata_cls.model_config.get("frozen", False):
+        raise ValueError(
+            f"Metadata model {metadata_cls.__name__} must have frozen=True"
+        )
+    # filter to only fields in the model
+    model_fields = {
+        k: v
+        for k, v in metadata.items()
+        if k in metadata_cls.__pydantic_fields__.keys()
+    }
+    # parse and return model instance
+    try:
+        return metadata_cls(**model_fields)
+    except ValidationError as ex:
+        raise ValueError(f"Could not parse metadata into {metadata_cls.__name__}: {ex}")

inspect_ai/dataset/_util.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import json
 from typing import Any, Iterable, cast
+from pydantic import ValidationError
 from inspect_ai.model import (
     ChatMessage,
     ChatMessageAssistant,
@@ -33,9 +35,35 @@ def record_to_sample_fn(
             # collect metadata if specified
             metadata: dict[str, Any] | None = None
             if sample_fields.metadata:
-                metadata = {}
-                for name in sample_fields.metadata:
-                    metadata[name] = record.get(name)
+                if isinstance(sample_fields.metadata, list):
+                    metadata = {}
+                    for name in sample_fields.metadata:
+                        metadata[name] = record.get(name)
+                else:
+                    # must be frozen
+                    if not sample_fields.metadata.model_config.get("frozen", False):
+                        raise ValueError(
+                            f"Metadata model {sample_fields.metadata.__name__} must have frozen=True"
+                        )
+                    # filter to only fields in the model
+                    model_fields = record.get("metadata", None)
+                    if isinstance(model_fields, str):
+                        model_fields = json.loads(model_fields)
+                    elif model_fields is None:
+                        model_fields = {
+                            k: v
+                            for k, v in record.items()
+                            if k in sample_fields.metadata.__pydantic_fields__.keys()
+                        }
+                    # parse and return metadata
+                    try:
+                        metadata = sample_fields.metadata(**model_fields).model_dump()
+                    except ValidationError as ex:
+                        raise ValueError(
+                            f"Could not parse metadata into {sample_fields.metadata.__name__}: {ex}"
+                        )
             elif "metadata" in record:
                 metadata_field = record.get("metadata")
                 if isinstance(metadata_field, str):

inspect-ai 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl

inspect-ai 0.3.55py3-none-any.whl → 0.3.57py3-none-any.whl