PyPI - inspect-ai - Versions diffs - 0.3.63__py3-none-any.whl → 0.3.65__py3-none-any.whl - Mend

inspect-ai 0.3.63py3-none-any.whl → 0.3.65py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (182) hide show

inspect_ai/_cli/cache.py +8 -7
inspect_ai/_cli/common.py +0 -12
inspect_ai/_cli/eval.py +32 -4
inspect_ai/_cli/info.py +1 -0
inspect_ai/_cli/list.py +1 -1
inspect_ai/_cli/log.py +2 -0
inspect_ai/_cli/sandbox.py +4 -1
inspect_ai/_cli/score.py +181 -32
inspect_ai/_cli/trace.py +2 -0
inspect_ai/_cli/view.py +4 -2
inspect_ai/_display/core/config.py +7 -1
inspect_ai/_display/core/progress.py +1 -1
inspect_ai/_display/textual/app.py +8 -4
inspect_ai/_display/textual/widgets/samples.py +6 -5
inspect_ai/_display/textual/widgets/sandbox.py +6 -0
inspect_ai/_eval/__init__.py +0 -0
inspect_ai/_eval/eval.py +100 -97
inspect_ai/_eval/evalset.py +69 -69
inspect_ai/_eval/loader.py +122 -12
inspect_ai/_eval/registry.py +1 -1
inspect_ai/_eval/run.py +14 -0
inspect_ai/_eval/score.py +125 -36
inspect_ai/_eval/task/log.py +105 -4
inspect_ai/_eval/task/results.py +92 -38
inspect_ai/_eval/task/run.py +6 -2
inspect_ai/_eval/task/sandbox.py +35 -2
inspect_ai/_eval/task/task.py +49 -46
inspect_ai/_util/__init__.py +0 -0
inspect_ai/_util/constants.py +1 -1
inspect_ai/_util/content.py +8 -0
inspect_ai/_util/error.py +2 -0
inspect_ai/_util/file.py +15 -1
inspect_ai/_util/logger.py +4 -2
inspect_ai/_util/registry.py +7 -1
inspect_ai/_view/view.py +1 -2
inspect_ai/_view/www/App.css +8 -3
inspect_ai/_view/www/README.md +1 -1
inspect_ai/_view/www/dist/assets/index.css +66 -38
inspect_ai/_view/www/dist/assets/index.js +525 -523
inspect_ai/_view/www/log-schema.json +86 -73
inspect_ai/_view/www/package.json +1 -1
inspect_ai/_view/www/src/App.tsx +1 -0
inspect_ai/_view/www/src/components/AnsiDisplay.tsx +1 -1
inspect_ai/_view/www/src/components/JsonPanel.tsx +1 -1
inspect_ai/_view/www/src/components/LargeModal.tsx +39 -49
inspect_ai/_view/www/src/components/NavPills.tsx +3 -1
inspect_ai/_view/www/src/components/TabSet.tsx +19 -4
inspect_ai/_view/www/src/logfile/remoteLogFile.ts +0 -1
inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +1 -1
inspect_ai/_view/www/src/metadata/MetaDataView.tsx +1 -1
inspect_ai/_view/www/src/metadata/RenderedContent.tsx +6 -13
inspect_ai/_view/www/src/plan/PlanDetailView.tsx +17 -2
inspect_ai/_view/www/src/plan/SolverDetailView.tsx +1 -1
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +14 -5
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +4 -2
inspect_ai/_view/www/src/samples/SamplesTools.tsx +16 -24
inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +1 -1
inspect_ai/_view/www/src/samples/chat/ChatView.tsx +1 -0
inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +27 -13
inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +19 -17
inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +12 -10
inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +56 -66
inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +12 -5
inspect_ai/_view/www/src/samples/chat/tools/tool.ts +21 -36
inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +3 -1
inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +27 -25
inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +5 -1
inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +13 -13
inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +1 -1
inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +2 -2
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +9 -5
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +1 -1
inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -4
inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +1 -0
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +1 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +17 -6
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +14 -19
inspect_ai/_view/www/src/types/log.d.ts +107 -19
inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +7 -1
inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +5 -3
inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +25 -27
inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +12 -11
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +25 -2
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +60 -36
inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +4 -0
inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +6 -4
inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +16 -14
inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +9 -19
inspect_ai/_view/www/src/workspace/utils.ts +34 -0
inspect_ai/approval/_approval.py +2 -0
inspect_ai/approval/_approver.py +4 -4
inspect_ai/approval/_auto.py +1 -1
inspect_ai/approval/_human/approver.py +3 -0
inspect_ai/approval/_policy.py +5 -0
inspect_ai/approval/_registry.py +2 -2
inspect_ai/dataset/_dataset.py +36 -45
inspect_ai/dataset/_sources/__init__.py +0 -0
inspect_ai/dataset/_sources/csv.py +13 -13
inspect_ai/dataset/_sources/hf.py +29 -29
inspect_ai/dataset/_sources/json.py +10 -10
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_convert.py +3 -3
inspect_ai/log/_file.py +24 -9
inspect_ai/log/_log.py +98 -7
inspect_ai/log/_message.py +3 -1
inspect_ai/log/_recorders/file.py +4 -0
inspect_ai/log/_recorders/recorder.py +3 -0
inspect_ai/log/_transcript.py +19 -8
inspect_ai/model/__init__.py +2 -0
inspect_ai/model/_cache.py +39 -21
inspect_ai/model/_call_tools.py +2 -2
inspect_ai/model/_chat_message.py +14 -4
inspect_ai/model/_generate_config.py +1 -1
inspect_ai/model/_model.py +31 -24
inspect_ai/model/_model_output.py +14 -1
inspect_ai/model/_openai.py +10 -18
inspect_ai/model/_providers/google.py +9 -5
inspect_ai/model/_providers/openai.py +5 -9
inspect_ai/model/_providers/openrouter.py +1 -1
inspect_ai/scorer/__init__.py +6 -1
inspect_ai/scorer/_answer.py +1 -1
inspect_ai/scorer/_classification.py +4 -0
inspect_ai/scorer/_match.py +4 -5
inspect_ai/scorer/_metric.py +87 -28
inspect_ai/scorer/_metrics/__init__.py +3 -3
inspect_ai/scorer/_metrics/accuracy.py +8 -10
inspect_ai/scorer/_metrics/mean.py +3 -17
inspect_ai/scorer/_metrics/std.py +111 -30
inspect_ai/scorer/_model.py +12 -12
inspect_ai/scorer/_pattern.py +3 -3
inspect_ai/scorer/_reducer/reducer.py +36 -21
inspect_ai/scorer/_reducer/registry.py +2 -2
inspect_ai/scorer/_reducer/types.py +7 -1
inspect_ai/scorer/_score.py +11 -1
inspect_ai/scorer/_scorer.py +110 -16
inspect_ai/solver/__init__.py +1 -1
inspect_ai/solver/_basic_agent.py +19 -22
inspect_ai/solver/_bridge/__init__.py +0 -3
inspect_ai/solver/_bridge/bridge.py +3 -3
inspect_ai/solver/_chain.py +1 -2
inspect_ai/solver/_critique.py +3 -3
inspect_ai/solver/_fork.py +2 -2
inspect_ai/solver/_human_agent/__init__.py +0 -0
inspect_ai/solver/_human_agent/agent.py +5 -8
inspect_ai/solver/_human_agent/commands/clock.py +14 -10
inspect_ai/solver/_human_agent/commands/note.py +1 -1
inspect_ai/solver/_human_agent/commands/score.py +0 -11
inspect_ai/solver/_multiple_choice.py +15 -18
inspect_ai/solver/_prompt.py +7 -7
inspect_ai/solver/_solver.py +53 -52
inspect_ai/solver/_task_state.py +80 -69
inspect_ai/solver/_use_tools.py +9 -9
inspect_ai/tool/__init__.py +2 -1
inspect_ai/tool/_tool.py +43 -14
inspect_ai/tool/_tool_call.py +6 -2
inspect_ai/tool/_tool_choice.py +3 -1
inspect_ai/tool/_tool_def.py +10 -8
inspect_ai/tool/_tool_params.py +24 -0
inspect_ai/tool/_tool_with.py +7 -7
inspect_ai/tool/_tools/__init__.py +0 -0
inspect_ai/tool/_tools/_computer/_common.py +2 -2
inspect_ai/tool/_tools/_computer/_computer.py +11 -0
inspect_ai/tool/_tools/_execute.py +15 -9
inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
inspect_ai/tool/_tools/_web_search.py +7 -5
inspect_ai/util/_concurrency.py +3 -3
inspect_ai/util/_panel.py +2 -0
inspect_ai/util/_resource.py +12 -12
inspect_ai/util/_sandbox/docker/compose.py +23 -20
inspect_ai/util/_sandbox/docker/config.py +2 -1
inspect_ai/util/_sandbox/docker/docker.py +10 -1
inspect_ai/util/_sandbox/docker/service.py +100 -0
inspect_ai/util/_sandbox/environment.py +99 -96
inspect_ai/util/_subprocess.py +5 -3
inspect_ai/util/_subtask.py +15 -16
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/LICENSE +1 -1
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/METADATA +10 -6
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/RECORD +182 -175
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/top_level.txt +0 -0

inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx CHANGED Viewed

@@ -1,6 +1,7 @@
 import clsx from "clsx";
 import { EvalMetric, EvalResults, EvalScore, Reducer } from "../../types/log";
 import { formatPrettyDecimal } from "../../utils/format";
+import { metricDisplayName } from "../utils";
 import styles from "./ResultsPanel.module.css";
 interface ResultsPanelProps {
@@ -23,7 +24,7 @@ export const ResultsPanel: React.FC<ResultsPanelProps> = ({ results }) => {
           metric: {
             name: key,
             value: score.metrics[key].value,
-            options: {},
+            params: score.metrics[key].params,
             metadata: {},
           },
         };
@@ -31,18 +32,35 @@ export const ResultsPanel: React.FC<ResultsPanelProps> = ({ results }) => {
     });
     const metrics = Object.values(scorers)[0];
+    const showReducer = metrics && metrics.length > 0 && !!metrics[0].reducer;
     return (
       <div className={styles.simpleMetricsRows}>
         {metrics.map((metric, i) => {
-          return <VerticalMetric metricSummary={metric} isFirst={i === 0} />;
+          return (
+            <VerticalMetric
+              key={`simple-metric-${i}`}
+              metricSummary={metric}
+              isFirst={i === 0}
+              showReducer={showReducer}
+            />
+          );
         })}
       </div>
     );
   } else {
+    const showReducer =
+      results?.scores.findIndex((score) => !!score.reducer) !== -1;
     return (
       <div className={styles.multiMetricsRows}>
         {results?.scores?.map((score, index) => {
-          return <MultiScorerMetric scorer={score} isFirst={index === 0} />;
+          return (
+            <MultiScorerMetric
+              key={`multi-metric-${index}`}
+              scorer={score}
+              isFirst={index === 0}
+              showReducer={showReducer}
+            />
+          );
         })}
       </div>
     );
@@ -52,6 +70,7 @@ export const ResultsPanel: React.FC<ResultsPanelProps> = ({ results }) => {
 interface VerticalMetricProps {
   metricSummary: MetricSummary;
   isFirst: boolean;
+  showReducer: boolean;
 }
 /** Renders a Vertical Metric
@@ -59,21 +78,8 @@ interface VerticalMetricProps {
 const VerticalMetric: React.FC<VerticalMetricProps> = ({
   metricSummary,
   isFirst,
+  showReducer,
 }) => {
-  const reducer_component = metricSummary.reducer ? (
-    <div
-      className={clsx(
-        "text-style-label",
-        "text-style-secondary",
-        styles.verticalMetricReducer,
-      )}
-    >
-      {metricSummary.reducer}
-    </div>
-  ) : (
-    ""
-  );
   return (
     <div style={{ paddingLeft: isFirst ? "0" : "1em" }}>
       <div
@@ -84,11 +90,26 @@ const VerticalMetric: React.FC<VerticalMetricProps> = ({
           styles.verticalMetricName,
         )}
       >
-        {metricSummary.metric.name}
+        {metricDisplayName(metricSummary.metric)}
       </div>
-      {reducer_component}
+      {showReducer ? (
+        <div
+          className={clsx(
+            "text-style-label",
+            "text-style-secondary",
+            styles.verticalMetricReducer,
+          )}
+        >
+          {metricSummary.reducer || "default"}
+        </div>
+      ) : undefined}
       <div
-        className={clsx("vertical-metric-value", styles.verticalMetricValue)}
+        className={clsx(
+          "vertical-metric-value",
+          "text-size-largest",
+          styles.verticalMetricValue,
+        )}
       >
         {formatPrettyDecimal(metricSummary.metric.value)}
       </div>
@@ -99,33 +120,25 @@ const VerticalMetric: React.FC<VerticalMetricProps> = ({
 interface MultiScorerMetricProps {
   scorer: EvalScore;
   isFirst: boolean;
+  showReducer: boolean;
 }
 const MultiScorerMetric: React.FC<MultiScorerMetricProps> = ({
   scorer,
   isFirst,
+  showReducer,
 }) => {
   const titleFontClz = "text-size-base";
   const reducerFontClz = "text-size-smaller";
   const valueFontClz = "text-size-base";
-  const reducer_component = scorer.reducer ? (
+  return (
     <div
       className={clsx(
-        reducerFontClz,
-        "text-style-label",
-        "text-style-secondary",
-        styles.multiScorerReducer,
+        styles.multiScorer,
+        isFirst ? styles.multiScorerIndent : undefined,
       )}
     >
-      {scorer.reducer}
-    </div>
-  ) : (
-    ""
-  );
-  return (
-    <div style={{ paddingLeft: isFirst ? "0" : "1.5em" }}>
       <div
         className={clsx(
           titleFontClz,
@@ -137,13 +150,24 @@ const MultiScorerMetric: React.FC<MultiScorerMetricProps> = ({
       >
         {scorer.name}
       </div>
-      {reducer_component}
+      {showReducer ? (
+        <div
+          className={clsx(
+            reducerFontClz,
+            "text-style-label",
+            "text-style-secondary",
+            styles.multiScorerReducer,
+          )}
+        >
+          {scorer.reducer || "default"}
+        </div>
+      ) : undefined}
       <div className={clsx(valueFontClz, styles.multiScorerValue)}>
         {Object.keys(scorer.metrics).map((key) => {
           const metric = scorer.metrics[key];
           return (
-            <div>
-              <div>{metric.name}</div>
+            <div className={styles.multiScoreMetricGrid} key={key}>
+              <div>{metricDisplayName(metric)}</div>
               <div className={styles.multiScorerValueContent}>
                 {formatPrettyDecimal(metric.value)}
               </div>

inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx CHANGED Viewed

@@ -53,6 +53,7 @@ export const SecondaryBar: React.FC<SecondaryBarProps> = ({
     size: "minmax(12%, auto)",
     value: (
       <LabeledValue
+        key="sb-dataset"
         label="Dataset"
         className={(styles.staticCol, "text-size-small")}
       >
@@ -71,6 +72,7 @@ export const SecondaryBar: React.FC<SecondaryBarProps> = ({
     size: "minmax(12%, auto)",
     value: (
       <LabeledValue
+        key="sb-scorer"
         label={label}
         className={clsx(
           styles.staticCol,
@@ -88,6 +90,7 @@ export const SecondaryBar: React.FC<SecondaryBarProps> = ({
       size: "minmax(12%, auto)",
       value: (
         <LabeledValue
+          key="sb-params"
           label="Config"
           className={clsx(styles.justifyRight, "text-size-small")}
         >
@@ -106,6 +109,7 @@ export const SecondaryBar: React.FC<SecondaryBarProps> = ({
       size: "minmax(12%, auto)",
       value: (
         <LabeledValue
+          key="sb-duration"
           label="Duration"
           className={clsx(styles.justifyRight, "text-size-small")}
         >

inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx CHANGED Viewed

@@ -2,17 +2,19 @@ import clsx from "clsx";
 import { EvalScore } from "../../types/log";
 import { formatPrettyDecimal } from "../../utils/format";
+import { metricDisplayName } from "../utils";
 import styles from "./SidebarScoreView.module.css";
 interface SidebarScoreProps {
   scorer: EvalScore;
 }
 export const SidebarScoreView: React.FC<SidebarScoreProps> = ({ scorer }) => {
+  const showReducer = !!scorer.reducer;
   return (
     <div className={styles.container}>
       {Object.keys(scorer.metrics).map((metric) => {
         return (
-          <div className={styles.metric}>
+          <div className={styles.metric} key={metric}>
             <div
               className={clsx(
                 "text-style-secondary",
@@ -21,11 +23,11 @@ export const SidebarScoreView: React.FC<SidebarScoreProps> = ({ scorer }) => {
                 styles.metricName,
               )}
             >
-              {scorer.metrics[metric].name}
+              {metricDisplayName(scorer.metrics[metric])}
             </div>
-            {scorer.reducer ? (
+            {showReducer ? (
               <div className={clsx("text-size-small", styles.metricReducer)}>
-                ${scorer.reducer}
+                {scorer.reducer || "default"}
               </div>
             ) : (
               ""

inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx CHANGED Viewed

@@ -2,6 +2,7 @@ import clsx from "clsx";
 import { Fragment } from "react";
 import { Scores } from "../../types/log";
 import { formatPrettyDecimal } from "../../utils/format";
+import { metricDisplayName } from "../utils";
 import styles from "./SidebarScoresView.module.css";
 interface SidebarScoresProps {
@@ -9,26 +10,34 @@ interface SidebarScoresProps {
 }
 export const SidebarScoresView: React.FC<SidebarScoresProps> = ({ scores }) => {
+  const showReducer = scores.findIndex((score) => !!score.reducer) !== -1;
   return (
     <div className={styles.container}>
-      {scores.map((score) => {
+      {scores.map((score, idx) => {
         const name = score.name;
         const reducer = score.reducer;
         return (
-          <div className={styles.scoreWrapper}>
+          <div className={styles.scoreWrapper} key={`scorer-${name}-${idx}`}>
             <div
               className={clsx(
                 "text-style-secondary",
-                "text-label",
+                "text-style-label",
                 "text-size-small",
                 styles.metricName,
               )}
             >
               {name}
             </div>
-            {reducer ? (
-              <div className={clsx("text-size-small", styles.metricReducer)}>
-                {reducer}
+            {showReducer ? (
+              <div
+                className={clsx(
+                  "text-size-small",
+                  "text-style-label",
+                  "text-style-secondary",
+                  styles.metricReducer,
+                )}
+              >
+                {reducer || "default"}
               </div>
             ) : (
               ""
@@ -38,14 +47,7 @@ export const SidebarScoresView: React.FC<SidebarScoresProps> = ({ scores }) => {
                 const metric = score.metrics[key];
                 return (
                   <Fragment key={key}>
-                    <div
-                      className={clsx(
-                        "text-style-secondary",
-                        "text-style-label",
-                      )}
-                    >
-                      {metric.name}
-                    </div>
+                    <div className={clsx()}>{metricDisplayName(metric)}</div>
                     <div className={styles.metricValue}>
                       {formatPrettyDecimal(metric.value)}
                     </div>

inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx CHANGED Viewed

@@ -36,24 +36,6 @@ export const InfoTab: React.FC<PlanTabProps> = ({
     setHidden(false);
   }, [evalSpec, evalPlan, evalResults, evalStats, samples]);
-  const infoCards = [];
-  infoCards.push([
-    <PlanCard
-      evalSpec={evalSpec}
-      evalPlan={evalPlan}
-      scores={evalResults?.scores}
-    />,
-  ]);
-  if (evalStatus !== "started") {
-    infoCards.push(<UsageCard stats={evalStats} />);
-  }
-  // If there is error or progress, includes those within info
-  if (evalStatus === "error" && evalError) {
-    infoCards.unshift(<TaskErrorCard error={evalError} />);
-  }
   const showWarning =
     (!samples || samples.length === 0) &&
     evalStatus === "success" &&
@@ -73,7 +55,15 @@ export const InfoTab: React.FC<PlanTabProps> = ({
         ""
       )}
       <div style={{ padding: "0.5em 1em 0 1em", width: "100%" }}>
-        {infoCards}
+        <PlanCard
+          evalSpec={evalSpec}
+          evalPlan={evalPlan}
+          scores={evalResults?.scores}
+        />
+        {evalStatus !== "started" ? <UsageCard stats={evalStats} /> : undefined}
+        {evalStatus === "error" && evalError ? (
+          <TaskErrorCard error={evalError} />
+        ) : undefined}
       </div>
     </div>
   );

inspect_ai/_view/www/src/workspace/utils.ts ADDED Viewed

@@ -0,0 +1,34 @@
+import { EvalMetric } from "../types/log";
+export const metricDisplayName = (metric: EvalMetric): string => {
+  let modifier = undefined;
+  for (const metricModifier of metricModifiers) {
+    modifier = metricModifier(metric);
+    if (modifier) {
+      break;
+    }
+  }
+  const metricName = !modifier ? metric.name : `${metric.name}[${modifier}]`;
+  return metricName;
+};
+type MetricModifier = (metric: EvalMetric) => string | undefined;
+const clusterMetricModifier: MetricModifier = (
+  metric: EvalMetric,
+): string | undefined => {
+  if (metric.name !== "stderr") {
+    return undefined;
+  }
+  const clusterValue = ((metric.params || {}) as Record<string, unknown>)[
+    "cluster"
+  ];
+  if (clusterValue === undefined || typeof clusterValue !== "string") {
+    return undefined;
+  }
+  return clusterValue;
+};
+const metricModifiers: MetricModifier[] = [clusterMetricModifier];

inspect_ai/approval/_approval.py CHANGED Viewed

@@ -17,6 +17,8 @@ Possible values:
 class Approval(BaseModel):
+    """Approval details (decision, explanation, etc.)"""
     decision: ApprovalDecision
     """Approval decision."""

inspect_ai/approval/_approver.py CHANGED Viewed

@@ -20,10 +20,10 @@ class Approver(Protocol):
         Approve or reject a tool call.
         Args:
-            message (str): Message genreated by the model along with the tool call.
-            call (ToolCall): The tool call to be approved.
-            view (ToolCallView): Custom rendering of tool context and call.
-            state (state | None): The current task state, if available.
+            message: Message genreated by the model along with the tool call.
+            call: The tool call to be approved.
+            view: Custom rendering of tool context and call.
+            state: The current task state, if available.
         Returns:
             Approval: An Approval object containing the decision and explanation.

inspect_ai/approval/_auto.py CHANGED Viewed

@@ -11,7 +11,7 @@ def auto_approver(decision: ApprovalDecision = "approve") -> Approver:
     """Automatically apply a decision to tool calls.
     Args:
-       decision (ApprovalDecision): Decision to apply.
+       decision: Decision to apply.
     Returns:
        Approver: Auto approver.

inspect_ai/approval/_human/approver.py CHANGED Viewed

@@ -14,6 +14,9 @@ def human_approver(
 ) -> Approver:
     """Interactive human approver.
+    Args:
+       choices: Choices to present to human.
     Returns:
        Approver: Interactive human approver.
     """

inspect_ai/approval/_policy.py CHANGED Viewed

@@ -20,8 +20,13 @@ from ._call import call_approver, record_approval
 @dataclass
 class ApprovalPolicy:
+    """Policy mapping approvers to tools."""
     approver: Approver
+    """Approver for policy."""
     tools: str | list[str]
+    """Tools to use this approver for (can be full tool names or globs)."""
 def policy_approver(policies: str | list[ApprovalPolicy]) -> Approver:

inspect_ai/approval/_registry.py CHANGED Viewed

@@ -31,11 +31,11 @@ def approver(*args: Any, name: str | None = None, **attribs: Any) -> Any:
     Args:
       *args: Function returning `Approver` targeted by
         plain approver decorator without attributes (e.g. `@approver`)
-      name (str | None):
+      name:
         Optional name for approver. If the decorator has no name
         argument then the name of the function
         will be used to automatically assign a name.
-      **attribs: (dict[str,Any]): Additional approver attributes.
+      **attribs: Additional approver attributes.
     Returns:
         Approver with registry attributes.

inspect_ai/dataset/_dataset.py CHANGED Viewed

@@ -27,6 +27,8 @@ MT = TypeVar("MT", bound=BaseModel)
 class Sample(BaseModel):
+    r"""Sample for an evaluation task."""
     def __init__(
         self,
         input: str | list[ChatMessage],
@@ -38,22 +40,22 @@ class Sample(BaseModel):
         files: dict[str, str] | None = None,
         setup: str | None = None,
     ) -> None:
-        r"""Sample to be used in an evaluation task.
+        r"""Create a Sample.
         Args:
-            input (str | list[ChatMessage]): The input to be submitted to the model.
-            choices (list[str] | None): Optional. List of available answer choices
-            (used only for multiple-choice evals).
-            target (str | list[str]): Optional. Ideal target output. May be a literal value
+            input: The input to be submitted to the model.
+            choices: Optional. List of available answer choices
+                (used only for multiple-choice evals).
+            target: Optional. Ideal target output. May be a literal value
                 or narrative text to be used by a model grader.
-            id (int | str | None): Optional. Unique identifier for sample.
-            metadata (dict[str,Any] | None): Optional. Arbitrary metadata associated with the sample.
-            sandbox (SandboxEnvironmentType | None): Sandbox environment type
-            (or optionally a str or tuple with a shorthand spec)
-            files (dict[str, str] | None): Optional. Files that go along with the sample (copied to
-            SandboxEnvironment). Files can be paths, inline text, or inline binary (base64 encoded data URL).
-            setup (str | None): Optional. Setup script to run for sample (run
-            within default SandboxEnvironment).
+            id: Optional. Unique identifier for sample.
+            metadata: Optional. Arbitrary metadata associated with the sample.
+                sandbox (SandboxEnvironmentType | None): Sandbox environment type (or optionally a str or tuple with a shorthand spec)
+            sandbox: Optional. Sandbox specification for this sample.
+            files: Optional. Files that go along with the sample (copied to
+                SandboxEnvironment). Files can be paths, inline text, or inline binary (base64 encoded data URL).
+            setup: Optional. Setup script to run for sample (run
+                within default SandboxEnvironment).
         """
         super().__init__(
             input=input,
@@ -144,14 +146,6 @@ class Dataset(Sequence[Sample], abc.ABC):
     @abc.abstractmethod
     def shuffled(self) -> bool: ...
-    @abc.abstractmethod
-    def shuffle_choices(self, seed: int | None = None) -> None:
-        """Shuffle the order of the choices with each sample.
-        Args:
-           seed: (int | None): Random seed for shuffling (optional).
-        """
     @overload
     def __getitem__(self, index: int) -> Sample: ...
@@ -164,14 +158,6 @@ class Dataset(Sequence[Sample], abc.ABC):
     @abc.abstractmethod
     def __len__(self) -> int: ...
-    @abc.abstractmethod
-    def shuffle(self, seed: int | None = None) -> None:
-        """Shuffle the order of the dataset (in place).
-        Args:
-           seed: (int | None): Random seed for shuffling (optional).
-        """
     @abc.abstractmethod
     def sort(
         self,
@@ -185,8 +171,8 @@ class Dataset(Sequence[Sample], abc.ABC):
         The key function defaults to measuring the length of the sample's input field.
         Args:
-            reverse (bool): if true, sort in descending order. Defaults to False.
-            key (Callable[[Any], Any]): a callable mapping each item to a numeric value (optional, defaults to sample_input_len).
+            reverse: If `Treu`, sort in descending order. Defaults to False.
+            key: a callable mapping each item to a numeric value (optional, defaults to sample_input_len).
         """
     @abc.abstractmethod
@@ -196,28 +182,33 @@ class Dataset(Sequence[Sample], abc.ABC):
         """Filter the dataset using a predicate.
         Args:
-          predicate (Callable[[Sample], bool]): Filtering function.
-          name (str | None): Name for filtered dataset (optional).
+          predicate: Filtering function.
+          name: Name for filtered dataset (optional).
         Returns:
           Filtered dataset.
         """
+    @abc.abstractmethod
+    def shuffle(self, seed: int | None = None) -> None:
+        """Shuffle the order of the dataset (in place).
+        Args:
+           seed: Random seed for shuffling (optional).
+        """
+    @abc.abstractmethod
+    def shuffle_choices(self, seed: int | None = None) -> None:
+        """Shuffle the order of the choices with each sample.
+        Args:
+           seed: Random seed for shuffling (optional).
+        """
 @dataclass
 class FieldSpec:
-    r"""Specification for mapping data source fields to sample fields.
-    Args:
-        input (str): Name of the field containing the sample input.
-        target (str): Name of the field containing the sample target.
-        choices (str): Optional. Name of field containing the list of answer choices.
-        id (str): Optional. Unique identifier for the sample.
-        metadata (list[str] | None): List of additional field names that should be read as metadata.
-        sandbox (str): Optional. Sandbox type along with optional config file
-        files (str): Optional. Files that go along with the sample.
-        setup (str): Optional. Setup script to run for sample .
-    """
+    r"""Specification for mapping data source fields to sample fields."""
     input: str = field(default="input")
     """Name of the field containing the sample input."""

inspect_ai/dataset/_sources/__init__.py ADDED Viewed

File without changes

inspect_ai/dataset/_sources/csv.py CHANGED Viewed

@@ -35,30 +35,30 @@ def csv_dataset(
     r"""Read dataset from CSV file.
     Args:
-        csv_file (str): Path to CSV file. Can be a local filesystem path,
+        csv_file: Path to CSV file. Can be a local filesystem path,
             a path to an S3 bucket (e.g. "s3://my-bucket"), or an HTTPS URL.
             Use `fs_options` to pass arguments through to the `S3FileSystem` constructor.
-        sample_fields (FieldSpec | RecordToSample): Method of mapping underlying
+        sample_fields: Method of mapping underlying
             fields in the data source to Sample objects. Pass `None` if the data is already
             stored in `Sample` form (i.e. has "input" and "target" columns.); Pass a
             `FieldSpec` to specify mapping fields by name; Pass a `RecordToSample` to
             handle mapping with a custom function that returns one or more samples.
-        auto_id (bool): Assign an auto-incrementing ID for each sample.
-        shuffle (bool): Randomly shuffle the dataset order.
-        seed: (int | None): Seed used for random shuffle.
-        shuffle_choices: (bool | int | None): Whether to shuffle the choices. If an int is passed, this will be used as the seed when shuffling.
-        limit (int | None): Limit the number of records to read.
-        dialect (str): CSV dialect ("unix", "excel" or"excel-tab"). Defaults to "unix". See https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters for more details
-        encoding (str): Text encoding for file (defaults to "utf-8").
-        name (str): Optional name for dataset (for logging). If not specified,
+        auto_id: Assign an auto-incrementing ID for each sample.
+        shuffle: Randomly shuffle the dataset order.
+        seed: Seed used for random shuffle.
+        shuffle_choices: Whether to shuffle the choices. If an int is passed, this will be used as the seed when shuffling.
+        limit: Limit the number of records to read.
+        dialect: CSV dialect ("unix", "excel" or"excel-tab"). Defaults to "unix". See https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters for more details
+        encoding: Text encoding for file (defaults to "utf-8").
+        name: Optional name for dataset (for logging). If not specified,
             defaults to the stem of the filename
-        fs_options (dict[str, Any]): Optional. Additional arguments to pass through
+        fs_options: Optional. Additional arguments to pass through
             to the filesystem provider (e.g. `S3FileSystem`). Use `{"anon": True }`
             if you are accessing a public S3 bucket with no credentials.
-        fieldnames (list[str] | None): Optional. A list of fieldnames to use for the CSV.
+        fieldnames: Optional. A list of fieldnames to use for the CSV.
             If None, the values in the first row of the file will be used as the fieldnames.
             Useful for files without a header.
-        delimiter (str): Optional. The delimiter to use when parsing the file. Defaults to ",".
+        delimiter: Optional. The delimiter to use when parsing the file. Defaults to ",".
     Returns:
         Dataset read from CSV file.

inspect-ai 0.3.63__py3-none-any.whl → 0.3.65__py3-none-any.whl

inspect-ai 0.3.63py3-none-any.whl → 0.3.65py3-none-any.whl