PyPI - inspect-ai - Versions diffs - 0.3.59__py3-none-any.whl → 0.3.60__py3-none-any.whl - Mend

inspect-ai 0.3.59py3-none-any.whl → 0.3.60py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

inspect_ai/_cli/eval.py +0 -7
inspect_ai/_display/textual/widgets/samples.py +1 -1
inspect_ai/_eval/eval.py +10 -1
inspect_ai/_eval/loader.py +79 -19
inspect_ai/_eval/registry.py +6 -0
inspect_ai/_eval/score.py +2 -1
inspect_ai/_eval/task/results.py +6 -5
inspect_ai/_eval/task/run.py +11 -11
inspect_ai/_view/www/dist/assets/index.js +262 -303
inspect_ai/_view/www/src/App.mjs +6 -6
inspect_ai/_view/www/src/Types.mjs +1 -1
inspect_ai/_view/www/src/api/Types.ts +133 -0
inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
inspect_ai/_view/www/src/api/api-http.ts +219 -0
inspect_ai/_view/www/src/api/api-shared.ts +47 -0
inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
inspect_ai/_view/www/src/api/index.ts +51 -0
inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
inspect_ai/_view/www/src/index.js +2 -2
inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
inspect_ai/_view/www/src/navbar/Navbar.mjs +1 -1
inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +1 -1
inspect_ai/_view/www/src/samples/SampleList.mjs +1 -1
inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +14 -14
inspect_ai/_view/www/src/samples/SamplesTab.mjs +10 -10
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +1 -3
inspect_ai/_view/www/src/utils/vscode.ts +36 -0
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
inspect_ai/approval/_human/manager.py +1 -1
inspect_ai/model/_call_tools.py +55 -0
inspect_ai/model/_conversation.py +1 -4
inspect_ai/model/_generate_config.py +2 -8
inspect_ai/model/_model_output.py +15 -0
inspect_ai/model/_openai.py +383 -0
inspect_ai/model/_providers/anthropic.py +52 -11
inspect_ai/model/_providers/azureai.py +1 -1
inspect_ai/model/_providers/goodfire.py +248 -0
inspect_ai/model/_providers/groq.py +7 -3
inspect_ai/model/_providers/hf.py +6 -0
inspect_ai/model/_providers/mistral.py +2 -1
inspect_ai/model/_providers/openai.py +36 -202
inspect_ai/model/_providers/openai_o1.py +2 -4
inspect_ai/model/_providers/providers.py +22 -0
inspect_ai/model/_providers/together.py +4 -4
inspect_ai/model/_providers/util/__init__.py +2 -3
inspect_ai/model/_providers/util/hf_handler.py +1 -1
inspect_ai/model/_providers/util/llama31.py +1 -1
inspect_ai/model/_providers/util/util.py +0 -76
inspect_ai/scorer/_metric.py +3 -0
inspect_ai/scorer/_scorer.py +2 -1
inspect_ai/solver/__init__.py +2 -0
inspect_ai/solver/_basic_agent.py +1 -1
inspect_ai/solver/_bridge/__init__.py +3 -0
inspect_ai/solver/_bridge/bridge.py +100 -0
inspect_ai/solver/_bridge/patch.py +170 -0
inspect_ai/solver/_solver.py +6 -0
inspect_ai/util/_display.py +5 -0
inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +68 -63
inspect_ai/_view/www/src/api/Types.mjs +0 -117
inspect_ai/_view/www/src/api/api-http.mjs +0 -300
inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
inspect_ai/_view/www/src/api/index.mjs +0 -49
inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.59.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0

inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} RENAMED Viewed

@@ -1,5 +1,12 @@
 //@ts-check
-import { asyncJsonParse } from "../utils/Json.mjs";
+import {
+  EvalHeader,
+  EvalSummary,
+  LogViewAPI,
+  SampleSummary,
+} from "../api/Types";
+import { EvalLog, EvalPlan, EvalSample, EvalSpec } from "../types/log";
+import { asyncJsonParse } from "../utils/json-worker";
 import { AsyncQueue } from "../utils/queue.mjs";
 import {
   FileSizeLimitError,
@@ -9,42 +16,46 @@ import {
 // don't try to load samples greater than 50mb
 const MAX_BYTES = 50 * 1024 * 1024;
-/**
- * @typedef {Object} SampleEntry
- * @property {string} sampleId
- * @property {number} epoch
- */
+interface SampleEntry {
+  sampleId: string;
+  epoch: number;
+}
-/**
- * @typedef {Object} RemoteLogFile
- * @property {() => Promise<Object>} readHeader - Reads the header of the log file.
- * @property {() => Promise<Object>} readLogSummary - Reads the log summary including header and sample summaries.
- * @property {(sampleId: string, epoch: number) => Promise<Object>} readSample - Reads a specific sample file.
- * @property {() => Promise<import("../types/log").EvalLog>} readCompleteLog - Reads the complete log file including all samples.
- */
+export interface RemoteLogFile {
+  readHeader: () => Promise<EvalHeader>;
+  readLogSummary: () => Promise<EvalSummary>;
+  readSample: (sampleId: string, epoch: number) => Promise<EvalSample>;
+  readCompleteLog: () => Promise<EvalLog>;
+}
+interface LogStart {
+  version: number;
+  eval: EvalSpec;
+  plan: EvalPlan;
+}
 /**
  * Opens a remote log file and provides methods to read its contents.
- * @param {import("../api/Types.mjs").LogViewAPI} api - The api
- * @param {string} url - The URL of the remote zip file.
- * @param {number} concurrency - The number of concurrent operations allowed.
- * @returns {Promise<RemoteLogFile>} An object with methods to read the log file.
  */
-export const openRemoteLogFile = async (api, url, concurrency) => {
+export const openRemoteLogFile = async (
+  api: LogViewAPI,
+  url: string,
+  concurrency: number,
+): Promise<RemoteLogFile> => {
   const queue = new AsyncQueue(concurrency);
   const remoteZipFile = await openRemoteZipFile(
-    `${encodeURIComponent(url)}`,
+    url,
     api.eval_log_size,
     api.eval_log_bytes,
   );
   /**
    * Reads and parses a JSON file from the zip.
-   * @param {string} file - The name of the file to read.
-   * @param {number} [maxBytes] - the max bytes
-   * @returns {Promise<Object>} The parsed JSON content.
    */
-  const readJSONFile = async (file, maxBytes) => {
+  const readJSONFile = async (
+    file: string,
+    maxBytes?: number,
+  ): Promise<Object> => {
     try {
       const data = await remoteZipFile.readFile(file, maxBytes);
       const textDecoder = new TextDecoder("utf-8");
@@ -53,19 +64,22 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
     } catch (error) {
       if (error instanceof FileSizeLimitError) {
         throw error;
-      } else {
+      } else if (error instanceof Error) {
         throw new Error(
           `Failed to read or parse file ${file}: ${error.message}`,
         );
+      } else {
+        throw new Error(
+          `Failed to read or parse file ${file} - an unknown error occurred`,
+        );
       }
     }
   };
   /**
    * Lists all samples in the zip file.
-   * @returns {Promise<SampleEntry[]>} An array of sample objects.
    */
-  const listSamples = async () => {
+  const listSamples = async (): Promise<SampleEntry[]> => {
     return Array.from(remoteZipFile.centralDirectory.keys())
       .filter(
         (filename) =>
@@ -82,14 +96,14 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
   /**
    * Reads a specific sample file.
-   * @param {string} sampleId - The ID of the sample.
-   * @param {number} epoch - The epoch of the sample.
-   * @returns {Promise<Object>} The content of the sample file.
    */
-  const readSample = async (sampleId, epoch) => {
+  const readSample = async (
+    sampleId: string,
+    epoch: number,
+  ): Promise<EvalSample> => {
     const sampleFile = `samples/${sampleId}_epoch_${epoch}.json`;
     if (remoteZipFile.centralDirectory.has(sampleFile)) {
-      return readJSONFile(sampleFile, MAX_BYTES);
+      return (await readJSONFile(sampleFile, MAX_BYTES)) as EvalSample;
     } else {
       console.log({ dir: remoteZipFile.centralDirectory });
       throw new Error(
@@ -100,13 +114,12 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
   /**
    * Reads the results.json file.
-   * @returns {Promise<Object>} The content of results.json.
    */
-  const readHeader = async () => {
+  const readHeader = async (): Promise<EvalHeader> => {
     if (remoteZipFile.centralDirectory.has("header.json")) {
-      return readJSONFile("header.json");
+      return (await readJSONFile("header.json")) as EvalHeader;
     } else {
-      const evalSpec = await readJSONFile("_journal/start.json");
+      const evalSpec = (await readJSONFile("_journal/start.json")) as LogStart;
       return {
         status: "started",
         eval: evalSpec.eval,
@@ -117,9 +130,8 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
   /**
    * Reads individual summary files when summaries.json is not available.
-   * @returns {Promise<Object>} Combined summaries from individual files.
    */
-  const readFallbackSummaries = async () => {
+  const readFallbackSummaries = async (): Promise<SampleSummary[]> => {
     const summaryFiles = Array.from(
       remoteZipFile.centralDirectory.keys(),
     ).filter(
@@ -128,14 +140,16 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
         filename.endsWith(".json"),
     );
-    const summaries = [];
-    const errors = [];
+    const summaries: SampleSummary[] = [];
+    const errors: unknown[] = [];
     await Promise.all(
       summaryFiles.map((filename) =>
         queue.enqueue(async () => {
           try {
-            const partialSummary = await readJSONFile(filename);
+            const partialSummary = (await readJSONFile(
+              filename,
+            )) as SampleSummary[];
             summaries.push(...partialSummary);
           } catch (error) {
             errors.push(error);
@@ -156,11 +170,10 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
   /**
    * Reads all summaries, falling back to individual files if necessary.
-   * @returns {Promise<Object>} All summaries.
    */
-  const readSampleSummaries = async () => {
+  const readSampleSummaries = async (): Promise<SampleSummary[]> => {
     if (remoteZipFile.centralDirectory.has("summaries.json")) {
-      return await readJSONFile("summaries.json");
+      return (await readJSONFile("summaries.json")) as SampleSummary[];
     } else {
       return readFallbackSummaries();
     }
@@ -187,14 +200,17 @@ export const openRemoteLogFile = async (api, url, concurrency) => {
     readSample,
     /**
      * Reads the complete log file.
-     * @returns {Promise<import("../types/log").EvalLog>} The complete log data.
      */
-    readCompleteLog: async () => {
+    readCompleteLog: async (): Promise<EvalLog> => {
       const [evalLog, samples] = await Promise.all([
         readHeader(),
         listSamples().then((sampleIds) =>
           Promise.all(
-            sampleIds.map(({ sampleId, epoch }) => readSample(sampleId, epoch)),
+            sampleIds.map(({ sampleId, epoch }) =>
+              readSample(sampleId, epoch).then(
+                (sample) => sample as EvalSample,
+              ),
+            ),
           ),
         ),
       ]);

inspect_ai/_view/www/src/navbar/Navbar.mjs CHANGED Viewed

@@ -18,8 +18,8 @@ import { SecondaryBar } from "./SecondaryBar.mjs";
  * @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults
  * @param {import("../types/log").EvalPlan} [props.evalPlan] - The EvalSpec
  * @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
- * @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
  * @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
+ * @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
  * @param {string} [props.status] - the status
  * @param {boolean} props.offcanvas - Are we in offcanvas mode?
  * @param {boolean} props.showToggle - Should we show the toggle?

inspect_ai/_view/www/src/navbar/SecondaryBar.mjs CHANGED Viewed

@@ -13,8 +13,8 @@ import { scoreFilterItems } from "../samples/tools/filters.mjs";
  * @param {import("../types/log").EvalPlan} [props.evalPlan] - The EvalSpec
  * @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults
  * @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
- * @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
  * @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
+ * @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
  * @param {string} [props.status] - the status
  * @param {Map<string, string>} [props.style] - is this off canvas
  *

inspect_ai/_view/www/src/samples/SampleList.mjs CHANGED Viewed

@@ -252,7 +252,7 @@ const SeparatorRow = ({ id, title, height }) => {
  * @param {Object} props - The parameters for the component.
  * @param {string} props.id - The unique identifier for the sample.
  * @param {number} props.index - The index of the sample.
- * @param {import("../api/Types.mjs").SampleSummary} props.sample - The sample.
+ * @param {import("../api/Types.ts").SampleSummary} props.sample - The sample.
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor.
  * @param {number} props.height - The height of the sample row.
  * @param {boolean} props.selected - Whether the sample is selected.

inspect_ai/_view/www/src/samples/SampleScores.mjs CHANGED Viewed

@@ -2,7 +2,7 @@ import { html } from "htm/preact";
 /**
  * @param {Object} props
- * @param {import("../api/Types.mjs").SampleSummary} props.sample
+ * @param {import("../api/Types.ts").SampleSummary} props.sample
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor
  * @param {string} props.scorer
  * @returns {import("preact").JSX.Element}

inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs CHANGED Viewed

@@ -21,12 +21,12 @@ import {
  * Represents a utility summary of the samples that doesn't change with the selected score.
  * @typedef {Object} EvalDescriptor
  * @property {number} epochs - The number of epochs.
- * @property {import("../api/Types.mjs").SampleSummary[]} samples - The list of sample summaries.
+ * @property {import("../api/Types.ts").SampleSummary[]} samples - The list of sample summaries.
  * @property {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
- * @property {(sample: import("../api/Types.mjs").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => ScorerDescriptor} scorerDescriptor - Returns the scorer descriptor for a sample and a specified scorer.
+ * @property {(sample: import("../api/Types.ts").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => ScorerDescriptor} scorerDescriptor - Returns the scorer descriptor for a sample and a specified scorer.
  * @property {(scoreLabel: import("../Types.mjs").ScoreLabel) => ScoreDescriptor} scoreDescriptor - Provides information about the score types and how to render them.
- * @property {(sample: import("../api/Types.mjs").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => SelectedScore} score - Returns information about a score for a sample.
- * @property {(sample: import("../api/Types.mjs").BasicSampleData, scorer: string) => string} scoreAnswer - Returns the answer for a sample and a specified scorer.
+ * @property {(sample: import("../api/Types.ts").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => SelectedScore} score - Returns information about a score for a sample.
+ * @property {(sample: import("../api/Types.ts").BasicSampleData, scorer: string) => string} scoreAnswer - Returns the answer for a sample and a specified scorer.
  */
 /**
@@ -35,8 +35,8 @@ import {
  * @property {EvalDescriptor} evalDescriptor - The EvalDescriptor.
  * @property {MessageShape} messageShape - The normalized sizes of input, target, and answer messages.
  * @property {ScoreDescriptor} selectedScoreDescriptor - Provides information about the score types and how to render them.
- * @property {(sample: import("../api/Types.mjs").BasicSampleData) => SelectedScore} selectedScore - Returns the selected score for a sample.
- * @property {(sample: import("../api/Types.mjs").BasicSampleData) => ScorerDescriptor} selectedScorerDescriptor - Returns the scorer descriptor for a sample using the selected scorer.
+ * @property {(sample: import("../api/Types.ts").BasicSampleData) => SelectedScore} selectedScore - Returns the selected score for a sample.
+ * @property {(sample: import("../api/Types.ts").BasicSampleData) => ScorerDescriptor} selectedScorerDescriptor - Returns the scorer descriptor for a sample using the selected scorer.
  */
 /**
@@ -108,7 +108,7 @@ export const parseScoreLabelKey = (key) => {
 /**
  * @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
- * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
+ * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
  * @param {number} epochs - The number of epochs
  * @returns {EvalDescriptor} The EvalDescriptor
  */
@@ -118,7 +118,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
   }
   /**
-   * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
+   * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
    * @param {import("../Types.mjs").ScoreLabel} scoreLabel - the score label
    * @returns {import("../types/log").Value2} The Score
    */
@@ -142,7 +142,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
   };
   /**
-   * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
+   * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
    * @param {string} scorer - the scorer name
    * @returns {string} The answer
    */
@@ -158,7 +158,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
   };
   /**
-   * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
+   * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
    * @param {string} scorer - the scorer name
    * @returns {string} The explanation
    */
@@ -174,7 +174,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
   // Retrieve the metadata for a sample
   /**
-   * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
+   * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
    * @param {string} scorer - the scorer name
    * @returns {Object} The explanation
    */
@@ -248,7 +248,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
   };
   /**
-   * @param {import("../api/Types.mjs").BasicSampleData} sample
+   * @param {import("../api/Types.ts").BasicSampleData} sample
    * @param {import("../Types.mjs").ScoreLabel} scoreLabel
    * @returns {any}
    */
@@ -265,7 +265,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
   };
   /**
-   * @param {import("../api/Types.mjs").BasicSampleData} sample
+   * @param {import("../api/Types.ts").BasicSampleData} sample
    * @param {import("../Types.mjs").ScoreLabel} scoreLabel
    * @returns {ScorerDescriptor}
    */
@@ -348,7 +348,7 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
   };
   /**
-   * @param {import("../api/Types.mjs").BasicSampleData} sample
+   * @param {import("../api/Types.ts").BasicSampleData} sample
    * @param {import("../Types.mjs").ScoreLabel} scoreLabel
    * @returns {SelectedScore}
    */

inspect_ai/_view/www/src/samples/SamplesTab.mjs CHANGED Viewed

@@ -13,7 +13,7 @@ import { EmptyPanel } from "../components/EmptyPanel.mjs";
  * @param {Object} props - The parameters for the component.
  * @param {import("../types/log").Sample} [props.sample] - The sample
  * @param {string} [props.task_id] - The task id
- * @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
+ * @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
  * @param {import("../Types.mjs").SampleMode} props.sampleMode - the mode for displaying samples
  * @param {"epoch" | "sample" | "none" } props.groupBy - how to group items
  * @param {"asc" | "desc" } props.groupByOrder - whether grouping is ascending or descending
@@ -213,19 +213,19 @@ export const SamplesTab = ({
  * @property {string} label - The label for the sample, formatted as "Sample {group} (Epoch {item})".
  * @property {number} number - The current counter item value.
  * @property {number} index - The index of the sample.
- * @property {import("../api/Types.mjs").SampleSummary | string} data - The items data payload.
+ * @property {import("../api/Types.ts").SampleSummary | string} data - The items data payload.
  * @property {string} type - The type of the result, in this case, "sample". (or "separator")
  */
 /**
  * Perform any grouping of the samples
  *
- * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
+ * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
  * @param {"sample" | "epoch" | "none"} groupBy - how to group samples
  * @param {"asc" | "desc"} groupByOrder - how to order grouped samples
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} sampleDescriptor - the sample descriptor
- * @returns {(sample: import("../api/Types.mjs").SampleSummary, index: number, previousSample: import("../api/Types.mjs").SampleSummary) => ListItem[]} The list items
+ * @returns {(sample: import("../api/Types.ts").SampleSummary, index: number, previousSample: import("../api/Types.ts").SampleSummary) => ListItem[]} The list items
  */
 const getSampleProcessor = (
   samples,
@@ -246,9 +246,9 @@ const getSampleProcessor = (
 /**
  * Performs no grouping
  *
- * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
+ * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
  * @param {string} order - the selected order
- * @returns {(sample: import("../api/Types.mjs").SampleSummary, index: number, previousSample: import("../api/Types.mjs").SampleSummary) => ListItem[]} The list
+ * @returns {(sample: import("../api/Types.ts").SampleSummary, index: number, previousSample: import("../api/Types.ts").SampleSummary) => ListItem[]} The list
  */
 const noGrouping = (samples, order) => {
   const counter = getCounter(samples.length, 1, order);
@@ -270,10 +270,10 @@ const noGrouping = (samples, order) => {
 /**
  * Groups by sample (showing separators for Epochs)
  *
- * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
+ * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
  * @param {string} order - the selected order
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} sampleDescriptor - the sample descriptor
- * @returns {(sample: import("../api/Types.mjs").SampleSummary, index: number, previousSample: import("../api/Types.mjs").SampleSummary) => ListItem[]} The list
+ * @returns {(sample: import("../api/Types.ts").SampleSummary, index: number, previousSample: import("../api/Types.ts").SampleSummary) => ListItem[]} The list
  */
 const groupBySample = (samples, sampleDescriptor, order) => {
   // ensure that we are sorted by id
@@ -327,10 +327,10 @@ const groupBySample = (samples, sampleDescriptor, order) => {
 /**
  * Groups by epoch (showing a separator for each sample)
  *
- * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
+ * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
  * @param {string} order - the selected order
  * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} sampleDescriptor - the sample descriptor
- * @returns {(sample: import("../api/Types.mjs").SampleSummary, index: number, previousSample: import("../api/Types.mjs").SampleSummary) => ListItem[]} The list
+ * @returns {(sample: import("../api/Types.ts").SampleSummary, index: number, previousSample: import("../api/Types.ts").SampleSummary) => ListItem[]} The list
  */
 const groupByEpoch = (samples, sampleDescriptor, order) => {
   const groupCount = sampleDescriptor.evalDescriptor.epochs;

inspect_ai/_view/www/src/samples/tools/SortFilter.mjs CHANGED Viewed

@@ -89,9 +89,9 @@ const sortId = (a, b) => {
  * Sorts a list of samples
  *
  * @param {string} sort - The sort direction
- * @param {import("../../api/Types.mjs").SampleSummary[]} samples - The samples
+ * @param {import("../../api/Types.ts").SampleSummary[]} samples - The samples
  * @param {import("../SamplesDescriptor.mjs").SamplesDescriptor} samplesDescriptor - The samples descriptor
- * @returns {{ sorted: import("../../api/Types.mjs").SampleSummary[], order: 'asc' | 'desc' }} An object with sorted samples and the sort order.
+ * @returns {{ sorted: import("../../api/Types.ts").SampleSummary[], order: 'asc' | 'desc' }} An object with sorted samples and the sort order.
  */
 export const sortSamples = (sort, samples, samplesDescriptor) => {
   const sortedSamples = samples.sort((a, b) => {

inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} RENAMED Viewed

@@ -1,6 +1,4 @@
-// @ts-check
-export const asyncJsonParse = async (text) => {
+export const asyncJsonParse = async (text: string): Promise<any> => {
   const encoder = new TextEncoder();
   const encodedText = encoder.encode(text);
   const blob = new Blob([kWorkerCode], { type: "application/javascript" });

inspect_ai/_view/www/src/utils/vscode.ts ADDED Viewed

@@ -0,0 +1,36 @@
+/**
+ * Type definition for the VS Code API object
+ * Note: This is a minimal definition - expand based on your needs
+ */
+interface VSCodeApi {
+  postMessage(message: unknown): void;
+  getState(): unknown;
+  setState(state: unknown): void;
+}
+/**
+ * The cached instance of the VS Code API
+ */
+let vscodeApi: VSCodeApi | undefined;
+// Declare the acquireVsCodeApi function on the window object
+declare global {
+  interface Window {
+    acquireVsCodeApi?: () => VSCodeApi;
+  }
+}
+/**
+ * Gets or initializes the VS Code API instance
+ * @returns {VSCodeApi | undefined} The VS Code API instance if in VS Code environment, undefined otherwise
+ */
+export const getVscodeApi = (): VSCodeApi | undefined => {
+  if (window.acquireVsCodeApi) {
+    if (vscodeApi === undefined) {
+      vscodeApi = window.acquireVsCodeApi();
+    }
+    return vscodeApi;
+  } else {
+    return undefined;
+  }
+};

inspect_ai/_view/www/src/workspace/WorkSpace.mjs CHANGED Viewed

@@ -43,7 +43,7 @@ import { debounce } from "../utils/sync.mjs";
  * @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats for this eval
  * @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults for this eval
  * @param {import("../Types.mjs").CurrentLog} [props.log] - the current log
- * @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
+ * @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
  * @param {import("../Types.mjs").SampleMode} props.sampleMode - the mode for displaying samples
  * @param {string} props.groupBy - what to group by
  * @param {string} props.groupByOrder - the grouping order

inspect_ai/approval/_human/manager.py CHANGED Viewed

@@ -40,7 +40,7 @@ class HumanApprovalManager:
         future = cast(Future[Approval], asyncio.get_event_loop().create_future())
         sample = sample_active()
         assert sample
-        assert sample.sample.id
+        assert sample.sample.id is not None
         pending = PendingApprovalRequest(
             request=request,
             task=sample.task,

inspect_ai/model/_call_tools.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import inspect
+import json
 import types
 from dataclasses import is_dataclass
 from logging import getLogger
@@ -21,6 +22,7 @@ from typing import (
     is_typeddict,
 )
+import yaml
 from jsonschema import Draft7Validator
 from pydantic import BaseModel
@@ -469,3 +471,56 @@ def truncate_tool_output(
         )
     else:
         return None
+def tool_parse_error_message(arguments: str, ex: Exception) -> str:
+    return f"Error parsing the following tool call arguments:\n\n{arguments}\n\nError details: {ex}"
+def parse_tool_call(
+    id: str, function: str, arguments: str, tools: list[ToolInfo] | None = None
+) -> ToolCall:
+    error: str | None = None
+    arguments_dict: dict[str, Any] = {}
+    def report_parse_error(ex: Exception) -> None:
+        nonlocal error
+        error = tool_parse_error_message(arguments, ex)
+        logger.info(error)
+    # if the arguments is a dict, then handle it with a plain json.loads
+    arguments = arguments.strip()
+    if arguments.startswith("{"):
+        try:
+            arguments_dict = json.loads(arguments)
+        except json.JSONDecodeError as ex:
+            report_parse_error(ex)
+    # otherwise parse it as yaml (which will pickup unquoted strings, numbers, and true/false)
+    # and then create a dict that maps it to the first function argument
+    elif function and tools:
+        tool_info = next(
+            (
+                tool
+                for tool in tools
+                if tool.name == function and len(tool.parameters.properties) > 0
+            ),
+            None,
+        )
+        if tool_info:
+            param_names = list(tool_info.parameters.properties.keys())
+            try:
+                value = yaml.safe_load(arguments)
+                arguments_dict[param_names[0]] = value
+            except yaml.error.YAMLError:
+                # If the yaml parser fails, we treat it as a string argument.
+                arguments_dict[param_names[0]] = arguments
+    # return ToolCall with error payload
+    return ToolCall(
+        id=id,
+        function=function,
+        arguments=arguments_dict,
+        type="function",
+        parse_error=error,
+    )

inspect_ai/model/_conversation.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from rich.console import RenderableType
 from rich.text import Text
-from inspect_ai._util.constants import NO_CONTENT
 from inspect_ai._util.rich import lines_display
 from inspect_ai._util.transcript import transcript_markdown
 from inspect_ai.util._conversation import conversation_panel
@@ -41,9 +40,7 @@ def conversation_assistant_message(
         # start with assistant content
         content: list[RenderableType] = (
-            [transcript_markdown(message.text, escape=True)]
-            if message.text and message.text != NO_CONTENT
-            else []
+            [transcript_markdown(message.text, escape=True)] if message.text else []
         )
         # print tool calls

inspect_ai/model/_generate_config.py CHANGED Viewed

@@ -34,7 +34,7 @@ class GenerateConfigArgs(TypedDict, total=False):
     """Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."""
     best_of: int | None
-    """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). OpenAI only."""
+    """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). vLLM only."""
     frequency_penalty: float | None
     """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, and vLLM only."""
@@ -48,9 +48,6 @@ class GenerateConfigArgs(TypedDict, total=False):
     seed: int | None
     """Random seed. OpenAI, Google, Mistral, Groq, HuggingFace, and vLLM only."""
-    suffix: str | None
-    """The suffix that comes after a completion of inserted text. OpenAI only."""
     top_k: int | None
     """Randomly sample the next word from the top_k most likely next words. Anthropic, Google, and HuggingFace only."""
@@ -107,7 +104,7 @@ class GenerateConfig(BaseModel):
     """Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."""
     best_of: int | None = Field(default=None)
-    """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). OpenAI and vLLM only."""
+    """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). vLLM only."""
     frequency_penalty: float | None = Field(default=None)
     """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, and vLLM only."""
@@ -121,9 +118,6 @@ class GenerateConfig(BaseModel):
     seed: int | None = Field(default=None)
     """Random seed. OpenAI, Google, Mistral, Groq, HuggingFace, and vLLM only."""
-    suffix: str | None = Field(default=None)
-    """The suffix that comes after a completion of inserted text. OpenAI only."""
     top_k: int | None = Field(default=None)
     """Randomly sample the next word from the top_k most likely next words. Anthropic, Google, HuggingFace, and vLLM only."""

inspect_ai/model/_model_output.py CHANGED Viewed

@@ -214,3 +214,18 @@ class ModelOutput(BaseModel):
                 )
             ],
         )
+def as_stop_reason(reason: str | None) -> StopReason:
+    """Encode common reason strings into standard StopReason."""
+    match reason:
+        case "stop" | "eos":
+            return "stop"
+        case "length":
+            return "max_tokens"
+        case "tool_calls" | "function_call":
+            return "tool_calls"
+        case "content_filter" | "model_length" | "max_tokens":
+            return reason
+        case _:
+            return "unknown"

inspect-ai 0.3.59__py3-none-any.whl → 0.3.60__py3-none-any.whl

inspect-ai 0.3.59py3-none-any.whl → 0.3.60py3-none-any.whl