PyPI - inspect-ai - Versions diffs - 0.3.63__py3-none-any.whl → 0.3.65__py3-none-any.whl - Mend

inspect-ai 0.3.63py3-none-any.whl → 0.3.65py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (182) hide show

inspect_ai/_cli/cache.py +8 -7
inspect_ai/_cli/common.py +0 -12
inspect_ai/_cli/eval.py +32 -4
inspect_ai/_cli/info.py +1 -0
inspect_ai/_cli/list.py +1 -1
inspect_ai/_cli/log.py +2 -0
inspect_ai/_cli/sandbox.py +4 -1
inspect_ai/_cli/score.py +181 -32
inspect_ai/_cli/trace.py +2 -0
inspect_ai/_cli/view.py +4 -2
inspect_ai/_display/core/config.py +7 -1
inspect_ai/_display/core/progress.py +1 -1
inspect_ai/_display/textual/app.py +8 -4
inspect_ai/_display/textual/widgets/samples.py +6 -5
inspect_ai/_display/textual/widgets/sandbox.py +6 -0
inspect_ai/_eval/__init__.py +0 -0
inspect_ai/_eval/eval.py +100 -97
inspect_ai/_eval/evalset.py +69 -69
inspect_ai/_eval/loader.py +122 -12
inspect_ai/_eval/registry.py +1 -1
inspect_ai/_eval/run.py +14 -0
inspect_ai/_eval/score.py +125 -36
inspect_ai/_eval/task/log.py +105 -4
inspect_ai/_eval/task/results.py +92 -38
inspect_ai/_eval/task/run.py +6 -2
inspect_ai/_eval/task/sandbox.py +35 -2
inspect_ai/_eval/task/task.py +49 -46
inspect_ai/_util/__init__.py +0 -0
inspect_ai/_util/constants.py +1 -1
inspect_ai/_util/content.py +8 -0
inspect_ai/_util/error.py +2 -0
inspect_ai/_util/file.py +15 -1
inspect_ai/_util/logger.py +4 -2
inspect_ai/_util/registry.py +7 -1
inspect_ai/_view/view.py +1 -2
inspect_ai/_view/www/App.css +8 -3
inspect_ai/_view/www/README.md +1 -1
inspect_ai/_view/www/dist/assets/index.css +66 -38
inspect_ai/_view/www/dist/assets/index.js +525 -523
inspect_ai/_view/www/log-schema.json +86 -73
inspect_ai/_view/www/package.json +1 -1
inspect_ai/_view/www/src/App.tsx +1 -0
inspect_ai/_view/www/src/components/AnsiDisplay.tsx +1 -1
inspect_ai/_view/www/src/components/JsonPanel.tsx +1 -1
inspect_ai/_view/www/src/components/LargeModal.tsx +39 -49
inspect_ai/_view/www/src/components/NavPills.tsx +3 -1
inspect_ai/_view/www/src/components/TabSet.tsx +19 -4
inspect_ai/_view/www/src/logfile/remoteLogFile.ts +0 -1
inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +1 -1
inspect_ai/_view/www/src/metadata/MetaDataView.tsx +1 -1
inspect_ai/_view/www/src/metadata/RenderedContent.tsx +6 -13
inspect_ai/_view/www/src/plan/PlanDetailView.tsx +17 -2
inspect_ai/_view/www/src/plan/SolverDetailView.tsx +1 -1
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +14 -5
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +4 -2
inspect_ai/_view/www/src/samples/SamplesTools.tsx +16 -24
inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +1 -1
inspect_ai/_view/www/src/samples/chat/ChatView.tsx +1 -0
inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +27 -13
inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +19 -17
inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +12 -10
inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +56 -66
inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +12 -5
inspect_ai/_view/www/src/samples/chat/tools/tool.ts +21 -36
inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +3 -1
inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +27 -25
inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +5 -1
inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +13 -13
inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +1 -1
inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +2 -2
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +9 -5
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +1 -1
inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -4
inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +1 -0
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +1 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +17 -6
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +14 -19
inspect_ai/_view/www/src/types/log.d.ts +107 -19
inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +7 -1
inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +5 -3
inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +25 -27
inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +12 -11
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +25 -2
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +60 -36
inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +4 -0
inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +6 -4
inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +16 -14
inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +9 -19
inspect_ai/_view/www/src/workspace/utils.ts +34 -0
inspect_ai/approval/_approval.py +2 -0
inspect_ai/approval/_approver.py +4 -4
inspect_ai/approval/_auto.py +1 -1
inspect_ai/approval/_human/approver.py +3 -0
inspect_ai/approval/_policy.py +5 -0
inspect_ai/approval/_registry.py +2 -2
inspect_ai/dataset/_dataset.py +36 -45
inspect_ai/dataset/_sources/__init__.py +0 -0
inspect_ai/dataset/_sources/csv.py +13 -13
inspect_ai/dataset/_sources/hf.py +29 -29
inspect_ai/dataset/_sources/json.py +10 -10
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_convert.py +3 -3
inspect_ai/log/_file.py +24 -9
inspect_ai/log/_log.py +98 -7
inspect_ai/log/_message.py +3 -1
inspect_ai/log/_recorders/file.py +4 -0
inspect_ai/log/_recorders/recorder.py +3 -0
inspect_ai/log/_transcript.py +19 -8
inspect_ai/model/__init__.py +2 -0
inspect_ai/model/_cache.py +39 -21
inspect_ai/model/_call_tools.py +2 -2
inspect_ai/model/_chat_message.py +14 -4
inspect_ai/model/_generate_config.py +1 -1
inspect_ai/model/_model.py +31 -24
inspect_ai/model/_model_output.py +14 -1
inspect_ai/model/_openai.py +10 -18
inspect_ai/model/_providers/google.py +9 -5
inspect_ai/model/_providers/openai.py +5 -9
inspect_ai/model/_providers/openrouter.py +1 -1
inspect_ai/scorer/__init__.py +6 -1
inspect_ai/scorer/_answer.py +1 -1
inspect_ai/scorer/_classification.py +4 -0
inspect_ai/scorer/_match.py +4 -5
inspect_ai/scorer/_metric.py +87 -28
inspect_ai/scorer/_metrics/__init__.py +3 -3
inspect_ai/scorer/_metrics/accuracy.py +8 -10
inspect_ai/scorer/_metrics/mean.py +3 -17
inspect_ai/scorer/_metrics/std.py +111 -30
inspect_ai/scorer/_model.py +12 -12
inspect_ai/scorer/_pattern.py +3 -3
inspect_ai/scorer/_reducer/reducer.py +36 -21
inspect_ai/scorer/_reducer/registry.py +2 -2
inspect_ai/scorer/_reducer/types.py +7 -1
inspect_ai/scorer/_score.py +11 -1
inspect_ai/scorer/_scorer.py +110 -16
inspect_ai/solver/__init__.py +1 -1
inspect_ai/solver/_basic_agent.py +19 -22
inspect_ai/solver/_bridge/__init__.py +0 -3
inspect_ai/solver/_bridge/bridge.py +3 -3
inspect_ai/solver/_chain.py +1 -2
inspect_ai/solver/_critique.py +3 -3
inspect_ai/solver/_fork.py +2 -2
inspect_ai/solver/_human_agent/__init__.py +0 -0
inspect_ai/solver/_human_agent/agent.py +5 -8
inspect_ai/solver/_human_agent/commands/clock.py +14 -10
inspect_ai/solver/_human_agent/commands/note.py +1 -1
inspect_ai/solver/_human_agent/commands/score.py +0 -11
inspect_ai/solver/_multiple_choice.py +15 -18
inspect_ai/solver/_prompt.py +7 -7
inspect_ai/solver/_solver.py +53 -52
inspect_ai/solver/_task_state.py +80 -69
inspect_ai/solver/_use_tools.py +9 -9
inspect_ai/tool/__init__.py +2 -1
inspect_ai/tool/_tool.py +43 -14
inspect_ai/tool/_tool_call.py +6 -2
inspect_ai/tool/_tool_choice.py +3 -1
inspect_ai/tool/_tool_def.py +10 -8
inspect_ai/tool/_tool_params.py +24 -0
inspect_ai/tool/_tool_with.py +7 -7
inspect_ai/tool/_tools/__init__.py +0 -0
inspect_ai/tool/_tools/_computer/_common.py +2 -2
inspect_ai/tool/_tools/_computer/_computer.py +11 -0
inspect_ai/tool/_tools/_execute.py +15 -9
inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
inspect_ai/tool/_tools/_web_search.py +7 -5
inspect_ai/util/_concurrency.py +3 -3
inspect_ai/util/_panel.py +2 -0
inspect_ai/util/_resource.py +12 -12
inspect_ai/util/_sandbox/docker/compose.py +23 -20
inspect_ai/util/_sandbox/docker/config.py +2 -1
inspect_ai/util/_sandbox/docker/docker.py +10 -1
inspect_ai/util/_sandbox/docker/service.py +100 -0
inspect_ai/util/_sandbox/environment.py +99 -96
inspect_ai/util/_subprocess.py +5 -3
inspect_ai/util/_subtask.py +15 -16
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/LICENSE +1 -1
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/METADATA +10 -6
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/RECORD +182 -175
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/top_level.txt +0 -0

inspect_ai/_view/www/src/types/log.d.ts CHANGED Viewed

@@ -112,6 +112,7 @@ export type Input =
       | ChatMessageAssistant
       | ChatMessageTool
     )[];
+export type Role = "system";
 export type Content =
   | string
   | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
@@ -127,18 +128,17 @@ export type Type4 = "video";
 export type Video = string;
 export type Format1 = "mp4" | "mpeg" | "mov";
 export type Source = ("input" | "generate") | null;
-export type Role = "system";
+export type Role1 = "user";
 export type Content1 =
   | string
   | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
 export type Source1 = ("input" | "generate") | null;
-export type Role1 = "user";
 export type ToolCallId = string[] | null;
+export type Role2 = "assistant";
 export type Content2 =
   | string
   | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
 export type Source2 = ("input" | "generate") | null;
-export type Role2 = "assistant";
 export type ToolCalls = ToolCall[] | null;
 export type Id1 = string;
 export type Function = string;
@@ -148,11 +148,11 @@ export type Title = string | null;
 export type Format2 = "text" | "markdown";
 export type Content3 = string;
 export type Reasoning = string | null;
+export type Role3 = "tool";
 export type Content4 =
   | string
   | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
 export type Source3 = ("input" | "generate") | null;
-export type Role3 = "tool";
 export type ToolCallId1 = string | null;
 export type Function1 = string | null;
 export type Type6 =
@@ -315,6 +315,7 @@ export type Timestamp8 = string;
 export type Pending8 = boolean | null;
 export type Event8 = "score";
 export type Target2 = string | string[] | null;
+export type Intermediate = boolean;
 export type Timestamp9 = string;
 export type Pending9 = boolean | null;
 export type Event9 = "error";
@@ -339,6 +340,7 @@ export type Lineno = number;
 export type Timestamp11 = string;
 export type Pending11 = boolean | null;
 export type Event11 = "info";
+export type Source4 = string | null;
 export type Timestamp12 = string;
 export type Pending12 = boolean | null;
 export type Event12 = "step";
@@ -424,6 +426,9 @@ export type SampleId1 = string | number | null;
 export type Samples2 = EvalSampleScore[];
 export type Location1 = string;
+/**
+ * Evaluation log.
+ */
 export interface EvalLog {
   version?: Version;
   status?: Status;
@@ -436,6 +441,9 @@ export interface EvalLog {
   reductions?: Reductions;
   location?: Location1;
 }
+/**
+ * Eval target and configuration.
+ */
 export interface EvalSpec {
   run_id: RunId;
   created: Created;
@@ -460,6 +468,9 @@ export interface EvalSpec {
 }
 export interface TaskAttribs {}
 export interface TaskArgs {}
+/**
+ * Dataset used for evaluation.
+ */
 export interface EvalDataset {
   name: Name;
   location: Location;
@@ -468,6 +479,9 @@ export interface EvalDataset {
   shuffled: Shuffled;
 }
 export interface ModelArgs {}
+/**
+ * Configuration used for evaluation.
+ */
 export interface EvalConfig {
   limit: Limit;
   sample_id: SampleId;
@@ -513,6 +527,9 @@ export interface ApproverPolicyConfig {
   params: Params;
 }
 export interface Params {}
+/**
+ * Git revision for evaluation.
+ */
 export interface EvalRevision {
   type: Type;
   origin: Origin;
@@ -521,19 +538,25 @@ export interface EvalRevision {
 export interface Packages {
   [k: string]: string;
 }
+/**
+ * Plan (solvers) used in evaluation.
+ */
 export interface EvalPlan {
   name: Name2;
   steps: Steps;
   finish: EvalPlanStep | null;
   config: GenerateConfig;
 }
+/**
+ * Solver step.
+ */
 export interface EvalPlanStep {
   solver: Solver1;
   params: Params1;
 }
 export interface Params1 {}
 /**
- * Base class for model generation configs.
+ * Model generation options.
  */
 export interface GenerateConfig {
   max_retries: MaxRetries;
@@ -560,12 +583,18 @@ export interface GenerateConfig {
   reasoning_effort: ReasoningEffort;
   reasoning_history: ReasoningHistory;
 }
+/**
+ * Scoring results from evaluation.
+ */
 export interface EvalResults {
   total_samples: TotalSamples;
   completed_samples: CompletedSamples;
   scores: Scores;
   metadata: Metadata3;
 }
+/**
+ * Score for evaluation task.
+ */
 export interface EvalScore {
   name: Name3;
   scorer: Scorer;
@@ -578,13 +607,19 @@ export interface Params2 {}
 export interface Metrics {
   [k: string]: EvalMetric;
 }
+/**
+ * Metric for evaluation score.
+ */
 export interface EvalMetric {
   name: Name4;
   value: Value;
-  options: Options;
+  params: Params3;
   metadata: Metadata1;
 }
-export interface Options {}
+export interface Params3 {}
+/**
+ * Timing and usage statistics.
+ */
 export interface EvalStats {
   started_at: StartedAt;
   completed_at: CompletedAt;
@@ -593,6 +628,9 @@ export interface EvalStats {
 export interface ModelUsage {
   [k: string]: ModelUsage1;
 }
+/**
+ * Token usage for completion.
+ */
 export interface ModelUsage1 {
   input_tokens: InputTokens;
   output_tokens: OutputTokens;
@@ -600,11 +638,17 @@ export interface ModelUsage1 {
   input_tokens_cache_write: InputTokensCacheWrite;
   input_tokens_cache_read: InputTokensCacheRead;
 }
+/**
+ * Eval error details.
+ */
 export interface EvalError {
   message: Message;
   traceback: Traceback;
   traceback_ansi: TracebackAnsi;
 }
+/**
+ * Sample from evaluation task.
+ */
 export interface EvalSample {
   id: Id;
   epoch: Epoch;
@@ -625,40 +669,61 @@ export interface EvalSample {
   attachments: Attachments;
   limit: EvalSampleLimit | null;
 }
+/**
+ * System chat message.
+ */
 export interface ChatMessageSystem {
+  role: Role;
   content: Content;
   source: Source;
-  role: Role;
 }
+/**
+ * Text content.
+ */
 export interface ContentText {
   type: Type1;
   text: Text;
 }
+/**
+ * Image content.
+ */
 export interface ContentImage {
   type: Type2;
   image: Image;
   detail: Detail;
 }
+/**
+ * Audio content.
+ */
 export interface ContentAudio {
   type: Type3;
   audio: Audio;
   format: Format;
 }
+/**
+ * Video content.
+ */
 export interface ContentVideo {
   type: Type4;
   video: Video;
   format: Format1;
 }
+/**
+ * User chat message.
+ */
 export interface ChatMessageUser {
+  role: Role1;
   content: Content1;
   source: Source1;
-  role: Role1;
   tool_call_id: ToolCallId;
 }
+/**
+ * Assistant chat message.
+ */
 export interface ChatMessageAssistant {
+  role: Role2;
   content: Content2;
   source: Source2;
-  role: Role2;
   tool_calls: ToolCalls;
   reasoning: Reasoning;
 }
@@ -679,10 +744,13 @@ export interface ToolCallContent {
   format: Format2;
   content: Content3;
 }
+/**
+ * Tool chat message.
+ */
 export interface ChatMessageTool {
+  role: Role3;
   content: Content4;
   source: Source3;
-  role: Role3;
   tool_call_id: ToolCallId1;
   function: Function1;
   error: ToolCallError | null;
@@ -691,6 +759,9 @@ export interface ToolCallError {
   type: Type6;
   message: Message1;
 }
+/**
+ * Output from model generation.
+ */
 export interface ModelOutput {
   model: Model1;
   choices: Choices1;
@@ -699,6 +770,9 @@ export interface ModelOutput {
   metadata: Metadata4;
   error: Error;
 }
+/**
+ * Choice generated for completion.
+ */
 export interface ChatCompletionChoice {
   message: ChatMessageAssistant;
   stop_reason: StopReason;
@@ -729,12 +803,6 @@ export interface TopLogprob {
 }
 /**
  * Score generated by a scorer.
- *
- * Args:
- *    value (Value): Score value.
- *    answer (str | None): Answer extracted from model output (optional).
- *    explanation (str | None): Explanation of score (optional).
- *    metadata (dict[str,Any]): Additional metadata related to the score.
  */
 export interface Score {
   value: Value1;
@@ -754,6 +822,9 @@ export interface SampleInitEvent {
   sample: Sample;
   state: JsonValue;
 }
+/**
+ * Sample for an evaluation task.
+ */
 export interface Sample {
   input: Input1;
   choices: Choices2;
@@ -888,7 +959,7 @@ export interface ToolFunction {
   name: Name6;
 }
 /**
- * Base class for model generation configs.
+ * Model generation options.
  */
 export interface GenerateConfig1 {
   max_retries: MaxRetries;
@@ -984,7 +1055,10 @@ export interface InputEvent {
   input_ansi: InputAnsi;
 }
 /**
- * Event with sample score.
+ * Event with score.
+ *
+ * Can be the final score for a `Sample`, or can be an intermediate score
+ * resulting from a call to `score`.
  */
 export interface ScoreEvent {
   timestamp: Timestamp8;
@@ -992,6 +1066,7 @@ export interface ScoreEvent {
   event: Event8;
   score: Score;
   target: Target2;
+  intermediate: Intermediate;
 }
 /**
  * Event with sample error.
@@ -1011,6 +1086,9 @@ export interface LoggerEvent {
   event: Event10;
   message: LoggingMessage;
 }
+/**
+ * Message written to Python log.
+ */
 export interface LoggingMessage {
   name: Name7;
   level: Level;
@@ -1027,6 +1105,7 @@ export interface InfoEvent {
   timestamp: Timestamp11;
   pending: Pending11;
   event: Event11;
+  source: Source4;
   data: JsonValue;
 }
 /**
@@ -1063,15 +1142,24 @@ export interface ModelUsage2 {
 export interface Attachments {
   [k: string]: string;
 }
+/**
+ * Limit encontered by sample.
+ */
 export interface EvalSampleLimit {
   type: Type13;
   limit: Limit2;
 }
+/**
+ * Score reductions.
+ */
 export interface EvalSampleReductions {
   scorer: Scorer1;
   reducer: Reducer1;
   samples: Samples2;
 }
+/**
+ * Score and sample_id scored.
+ */
 export interface EvalSampleScore {
   value: Value2;
   answer: Answer1;

inspect_ai/_view/www/src/usage/ModelTokenTable.tsx CHANGED Viewed

@@ -14,7 +14,13 @@ export const ModelTokenTable: React.FC<ModelTokenTable> = ({
       <TokenHeader />
       <tbody>
         {Object.keys(model_usage).map((key) => {
-          return <TokenRow model={key} usage={model_usage[key]} />;
+          return (
+            <TokenRow
+              key={key}
+              model={`${key}-token-row`}
+              usage={model_usage[key]}
+            />
+          );
         })}
       </tbody>
     </TokenTable>

inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx CHANGED Viewed

@@ -68,12 +68,14 @@ export const ModelUsagePanel: React.FC<ModelUsageProps> = ({ usage }) => {
   return (
     <div className={clsx("text-size-small", styles.wrapper)}>
-      {rows.map((row) => {
+      {rows.map((row, idx) => {
         if (row.label === "---") {
-          return <div className={styles.separator}></div>;
+          return (
+            <div key={`$usage-sep-${idx}`} className={styles.separator}></div>
+          );
         } else {
           return (
-            <Fragment>
+            <Fragment key={`$usage-row-${idx}`}>
               <div
                 className={clsx(
                   "text-style-label",

inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx CHANGED Viewed

@@ -79,32 +79,6 @@ export const WorkSpaceView: React.FC<WorkSpaceViewProps> = ({
     [setSelectedTab],
   );
-  // Compute tab panels anytime the tabs change
-  const tabPanels = useMemo(() => {
-    return Object.keys(tabs).map((key) => {
-      const tab = tabs[key];
-      return (
-        <TabPanel
-          id={tab.id}
-          title={tab.label}
-          onSelected={onSelected}
-          selected={selectedTab === tab.id}
-          scrollable={!!tab.scrollable}
-          scrollRef={tab.scrollRef}
-          scrollPosition={workspaceTabScrollPositionRef.current?.[tab.id]}
-          setScrollPosition={useCallback(
-            (position: number) => {
-              onScroll(tab.id, position);
-            },
-            [onScroll],
-          )}
-        >
-          {tab.content()}
-        </TabPanel>
-      );
-    });
-  }, [tabs, selectedTab]);
   if (evalSpec === undefined) {
     return <EmptyPanel />;
   } else {
@@ -150,7 +124,31 @@ export const WorkSpaceView: React.FC<WorkSpaceViewProps> = ({
               tabControlsClassName={clsx(styles.tabs, "text-size-smaller")}
               tabPanelsClassName={clsx(styles.tabPanels)}
             >
-              {tabPanels}
+              {Object.keys(tabs).map((key) => {
+                const tab = tabs[key];
+                return (
+                  <TabPanel
+                    key={tab.id}
+                    id={tab.id}
+                    title={tab.label}
+                    onSelected={onSelected}
+                    selected={selectedTab === tab.id}
+                    scrollable={!!tab.scrollable}
+                    scrollRef={tab.scrollRef}
+                    scrollPosition={
+                      workspaceTabScrollPositionRef.current?.[tab.id]
+                    }
+                    setScrollPosition={useCallback(
+                      (position: number) => {
+                        onScroll(tab.id, position);
+                      },
+                      [onScroll],
+                    )}
+                  >
+                    {tab.content()}
+                  </TabPanel>
+                );
+              })}
             </TabSet>
           </div>
         </div>

inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx CHANGED Viewed

@@ -30,16 +30,6 @@ export const PrimaryBar: React.FC<PrimaryBarProps> = ({
   evalSpec,
   setOffcanvas,
 }) => {
-  let statusPanel;
-  if (status === "success") {
-    statusPanel = <ResultsPanel results={evalResults} />;
-  } else if (status === "cancelled") {
-    statusPanel = <CancelledPanel sampleCount={samples?.length || 0} />;
-  } else if (status === "started") {
-    statusPanel = <RunningPanel sampleCount={samples?.length || 0} />;
-  } else if (status === "error") {
-    statusPanel = <ErroredPanel sampleCount={samples?.length || 0} />;
-  }
   const logFileName = file ? filename(file) : "";
   const handleToggle = useCallback(() => {
@@ -103,7 +93,18 @@ export const PrimaryBar: React.FC<PrimaryBarProps> = ({
         </div>
       </div>
       <div className={clsx(styles.taskStatus, "navbar-text")}>
-        {statusPanel}
+        {status === "success" ? (
+          <ResultsPanel results={evalResults} />
+        ) : undefined}
+        {status === "cancelled" ? (
+          <CancelledPanel sampleCount={samples?.length || 0} />
+        ) : undefined}
+        {status === "started" ? (
+          <RunningPanel sampleCount={samples?.length || 0} />
+        ) : undefined}
+        {status === "error" ? (
+          <ErroredPanel sampleCount={samples?.length || 0} />
+        ) : undefined}
       </div>
       <div id="task-created" style={{ display: "none" }}>
         {evalSpec?.created}

inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css CHANGED Viewed

@@ -14,13 +14,13 @@
   flex-direction: row;
   flex-wrap: wrap;
   justify-content: end;
-  height: 100%;
   align-items: center;
   margin-top: 0.2rem;
   padding-bottom: 0.4rem;
   row-gap: 1em;
   max-height: 15em;
   overflow: scroll;
+  align-items: baseline;
 }
 .verticalMetricReducer {
@@ -39,14 +39,26 @@
 }
 .verticalMetricValue {
-  font-size: var(--inspect-font-size-larger);
   font-weight: 500;
   text-align: center;
 }
+.multiScorer {
+  padding-left: 0;
+  height: 100%;
+  display: flex;
+  flex-direction: column;
+  padding: 0.5em 1em;
+}
+.multiScorerIndent {
+  padding-left: 1.5em;
+}
 .multiScorerReducer {
   text-align: center;
   margin-bottom: -0.3rem;
+  margin-top: 0.2em;
 }
 .multiScorerLabel {
@@ -58,10 +70,21 @@
 .multiScorerValue {
   display: grid;
   grid-template-columns: auto auto;
+  grid-auto-rows: auto;
   grid-column-gap: 0.3rem;
   grid-row-gap: 0;
+  padding-top: 0.3em;
 }
 .multiScorerValueContent {
   font-weight: 600;
+  text-align: center;
+}
+.multiScoreMetricGrid {
+  display: grid;
+  grid-template-rows: auto auto;
+  column-gap: 1em;
+  padding: 0 0.2em;
+  justify-content: center;
 }

inspect-ai 0.3.63__py3-none-any.whl → 0.3.65__py3-none-any.whl

inspect-ai 0.3.63py3-none-any.whl → 0.3.65py3-none-any.whl