PyPI - inspect-ai - Versions diffs - 0.3.65__py3-none-any.whl → 0.3.67__py3-none-any.whl - Mend

inspect-ai 0.3.65py3-none-any.whl → 0.3.67py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx CHANGED Viewed

@@ -1,7 +1,7 @@
 import { ApplicationIcons } from "../../appearance/icons";
 import clsx from "clsx";
-import styles from "./SampleErrorView.module.css";
+import styles from "./FlatSampleErrorView.module.css";
 import { errorType } from "./error";
 interface FlatSampleErrorViewProps {

inspect_ai/_view/www/src/samples/sample-tools/filters.ts CHANGED Viewed

@@ -35,7 +35,7 @@ const coerceValue = (value: unknown, descriptor: ScoreDescriptor): unknown => {
 // Whether a particular value is filter-able
 const isFilteringSupportedForValue = (value: unknown): boolean =>
-  ["string", "number", "boolean"].includes(typeof value);
+  ["string", "number", "boolean"].includes(typeof value) || value === null;
 /**
  * Returns the names of scores that are not allowed to be used as short names in
@@ -56,20 +56,26 @@ const bannedShortScoreNames = (scores: ScoreLabel[]): Set<string> => {
   return banned;
 };
+// Pseudo-variables added to all filter expressions. These are not needed in most cases.
+// Normally one could check a boolean value `foo` by simply typing `foo` or `not foo`.
+// However, some evals use tristate values that can be true, false or null. This is where
+// these constants come in handy.
+const filterExpressionConstants: Record<string, unknown> = {
+  True: true,
+  False: false,
+  None: null,
+};
 /**
  * Generates a dictionary of variables that can be used in the filter expression.
  * High-level scorer metrics can be accessed by name directly.
  * Child metrics are accessed using dot notation (e.g. `scorer_name.score_name`) or
  * directly by name when it is unique.
- *
- * @param {import("../../samples/descriptor/samplesDescriptor").EvalDescriptor} evalDescriptor
- * @param {import("../../types/log").Scores1} sampleScores
- * @returns {Object<string, any>}
  */
 const scoreVariables = (
   evalDescriptor: EvalDescriptor,
   sampleScores: Scores1,
-) => {
+): Record<string, unknown> => {
   const bannedShortNames = bannedShortScoreNames(evalDescriptor.scores);
   const variables: Record<string, unknown> = {};
@@ -77,7 +83,7 @@ const scoreVariables = (
     variableName: string,
     scoreLabel: ScoreLabel,
     value: unknown,
-  ) => {
+  ): void => {
     const coercedValue = coerceValue(
       value,
       evalDescriptor.scoreDescriptor(scoreLabel),
@@ -101,6 +107,12 @@ const scoreVariables = (
   return variables;
 };
+const sampleVariables = (sample: SampleSummary): Record<string, unknown> => {
+  return {
+    has_error: !!sample.error,
+  };
+};
 /**
  * Generates a dictionary of variables that can be used in the filter expression.
  * High-level scorer metrics can be accessed by name directly.
@@ -115,11 +127,6 @@ export const scoreFilterItems = (
   const valueToString = (value: unknown) =>
     typeof value === "string" ? `"${value}"` : String(value);
-  /**
-   * @param {string | undefined} shortName
-   * @param {string | undefined} qualifiedName
-   * @param {import("../../types").ScoreLabel} scoreLabel
-   */
   const addScore = (
     scoreLabel: ScoreLabel,
     shortName?: string,
@@ -196,13 +203,33 @@ export const filterExpression = (
         : [sample.target];
       return targets.some((target) => target.match(new RegExp(regex, "i")));
     };
+    const errorContains = (regex: string): boolean => {
+      return !!sample.error?.match(new RegExp(regex, "i"));
+    };
     const extraFunctions = {
       input_contains: inputContains,
       target_contains: targetContains,
+      error_contains: errorContains,
+    };
+    const mySampleVariables = sampleVariables(sample);
+    const vars = {
+      ...mySampleVariables,
+      ...scoreVariables(evalDescriptor, sample.scores),
+    };
+    const resolveVariable = (name: string, get: (name: string) => any) => {
+      // Sample variables (like has_error) always exist.
+      if (name in mySampleVariables) {
+        return get(name);
+      }
+      // Score variables exist only if the sample completed successfully.
+      return sample.error ? undefined : get(name);
     };
-    const expression = compileExpression(filterValue, { extraFunctions });
-    const vars = scoreVariables(evalDescriptor, sample.scores);
+    const expression = compileExpression(filterValue, {
+      extraFunctions,
+      constants: filterExpressionConstants,
+      customProp: resolveVariable,
+    });
     const result = expression(vars);
     if (typeof result === "boolean") {
       return { matches: result, error: undefined };
@@ -263,12 +290,6 @@ export const filterExpression = (
   }
 };
-/**
- * @param {import("../../samples/descriptor/samplesDescriptor").EvalDescriptor} evalDescriptor
- * @param {import("../../api/types").SampleSummary[]} samples
- * @param {string} filterValue
- * @returns {}
- */
 export const filterSamples = (
   evalDescriptor: EvalDescriptor,
   samples: SampleSummary[],

inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx CHANGED Viewed

@@ -39,7 +39,8 @@ interface SampleFilterProps {
 const FILTER_TOOLTIP = `
 Filter samples by:
   • Scores
-  • Input and target regex search: input_contains, target_contains
+  • Samples with errors: has_error
+  • Input, target and error regex search: input_contains, target_contains, error_contains
 Supported expressions:
   • Arithmetic: +, -, *, /, mod, ^

inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts CHANGED Viewed

@@ -13,7 +13,12 @@ import {
   kScoreTypePassFail,
 } from "../../../constants";
 import { ScoreFilterItem } from "../filters";
-import { KEYWORDS, MATH_FUNCTIONS, SAMPLE_FUNCTIONS } from "./language";
+import {
+  KEYWORDS,
+  MATH_FUNCTIONS,
+  SAMPLE_FUNCTIONS,
+  SAMPLE_VARIABLES,
+} from "./language";
 import { Token, tokenize } from "./tokenize";
 interface CompletionOptions {
@@ -76,10 +81,20 @@ const makeSampleFunctionCompletion = ([label, info]: [
   boost: 0,
 });
+const makeSampleVariableCompletion = ([label, info]: [
+  string,
+  string,
+]): Completion => ({
+  label,
+  type: "variable",
+  info,
+  boost: 10,
+});
 const makeLiteralCompletion = (k: string): Completion => ({
   label: k,
   type: "text",
-  boost: 10,
+  boost: 20,
 });
 const makeCanonicalNameCompletion = (
@@ -89,14 +104,14 @@ const makeCanonicalNameCompletion = (
   label: item.canonicalName + (autoSpaceIf(item) ? " " : ""),
   type: "variable",
   info: item.tooltip,
-  boost: 20,
+  boost: 30,
 });
 const makeMemberAccessCompletion = (item: ScoreFilterItem): Completion => ({
   label: item.qualifiedName?.split(".")[1] || "",
   type: "variable",
   info: item.tooltip,
-  boost: 20,
+  boost: 40,
 });
 const getMemberScoreItems = (
@@ -130,6 +145,9 @@ export function getCompletions(
   const sampleFunctionCompletionItems = SAMPLE_FUNCTIONS.map(
     makeSampleFunctionCompletion,
   );
+  const sampleVariableCompletionItems = SAMPLE_VARIABLES.map(
+    makeSampleVariableCompletion,
+  );
   const variableCompletionItems = filterItems.map((item) =>
     makeCanonicalNameCompletion(item),
   );
@@ -138,6 +156,7 @@ export function getCompletions(
     ...keywordCompletionItems,
     ...mathFunctionCompletionItems,
     ...sampleFunctionCompletionItems,
+    ...sampleVariableCompletionItems,
     ...variableCompletionItems,
   ];
@@ -218,9 +237,11 @@ export function getCompletions(
       },
     };
-    const priorityLabels = new Set(priorityCompletions.map((c) => c.label));
+    const priorityLabels = new Set(
+      priorityCompletions.map((c) => c.label.trim()),
+    );
     const defaultCompletionsAdjusted = defaultCompletionItems
-      .filter((c) => !priorityLabels.has(c.label))
+      .filter((c) => !priorityLabels.has(c.label.trim()))
       .map((c) => ({ ...c, section: miscSection }));
     return {
@@ -240,6 +261,7 @@ export function getCompletions(
             completingAtEnd && item.scoreType !== kScoreTypeBoolean,
         }),
       ),
+      ...sampleVariableCompletionItems,
       ...sampleFunctionCompletionItems,
     ]);

inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts CHANGED Viewed

@@ -13,7 +13,12 @@ export const MATH_FUNCTIONS: [string, string][] = [
   ["log10", "Base 10 logarithm"],
 ];
+export const SAMPLE_VARIABLES: [string, string][] = [
+  ["has_error", "Checks if the sample has an error"],
+];
 export const SAMPLE_FUNCTIONS: [string, string][] = [
   ["input_contains", "Checks if input contains a regular expression"],
   ["target_contains", "Checks if target contains a regular expression"],
+  ["error_contains", "Checks if error contains a regular expression"],
 ];

inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx CHANGED Viewed

@@ -24,9 +24,7 @@ export const LoggerEventView: React.FC<LoggerEventViewProps> = ({
       icon={ApplicationIcons.logging[event.message.level.toLowerCase()]}
     >
       <div className={clsx("text-size-base", styles.grid)}>
-        <div className={clsx("text-size-smaller")}>
-          ${event.message.message}
-        </div>
+        <div className={clsx("text-size-smaller")}>{event.message.message}</div>
         <div className={clsx("text-size-smaller", "text-style-secondary")}>
           {event.message.filename}:{event.message.lineno}
         </div>

inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx CHANGED Viewed

@@ -30,19 +30,6 @@ export const SubtaskEventView: React.FC<SubtaskEventViewProps> = ({
   className,
 }) => {
   // Render Forks specially
-  const transcript =
-    event.events.length > 0 ? (
-      <TranscriptView
-        id={`${id}-subtask`}
-        data-name="Transcript"
-        events={event.events}
-        depth={depth + 1}
-      />
-    ) : (
-      ""
-    );
   const body =
     event.type === "fork" ? (
       <div title="Summary" className={clsx(styles.summary)}>
@@ -51,7 +38,16 @@ export const SubtaskEventView: React.FC<SubtaskEventViewProps> = ({
           <Rendered values={event.input} />
         </div>
         <div className={clsx("text-style-label")}>Transcript</div>
-        {transcript}
+        {event.events.length > 0 ? (
+          <TranscriptView
+            id={`${id}-subtask`}
+            data-name="Transcript"
+            events={event.events}
+            depth={depth + 1}
+          />
+        ) : (
+          <None />
+        )}
       </div>
     ) : (
       <Fragment>
@@ -60,7 +56,14 @@ export const SubtaskEventView: React.FC<SubtaskEventViewProps> = ({
           input={event.input}
           result={event.result}
         />
-        {transcript}
+        {event.events.length > 0 ? (
+          <TranscriptView
+            id={`${id}-subtask`}
+            data-name="Transcript"
+            events={event.events}
+            depth={depth + 1}
+          />
+        ) : undefined}
       </Fragment>
     );
@@ -126,8 +129,20 @@ const Rendered: React.FC<RenderedProps> = ({ values }) => {
       return <Rendered values={val} />;
     });
   } else if (values && typeof values === "object") {
-    return <MetaDataView entries={values as Record<string, unknown>} />;
+    if (Object.keys(values).length === 0) {
+      return <None />;
+    } else {
+      return <MetaDataView entries={values as Record<string, unknown>} />;
+    }
   } else {
     return values;
   }
 };
+const None: React.FC = () => {
+  return (
+    <span className={clsx("text-size-small", "text-style-secondary")}>
+      [None]
+    </span>
+  );
+};

inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx CHANGED Viewed

@@ -387,7 +387,10 @@ const fixupEventStream = (events: Events) => {
   });
   const initEvent = events[initEventIndex];
-  const fixedUp = [...events];
+  // Filter pending events
+  const finalEvents = events.filter((e) => !e.pending);
+  const fixedUp = [...finalEvents];
   if (initEvent) {
     fixedUp.splice(initEventIndex, 0, {
       timestamp: initEvent.timestamp,

inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css CHANGED Viewed

@@ -5,6 +5,7 @@
   font-size: var(--inspect-font-size-smaller);
   display: grid;
   grid-template-columns: auto auto;
+  justify-content: end;
 }
 .statusIcon {

inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx CHANGED Viewed

@@ -51,9 +51,9 @@ const StatusPanel: React.FC<StatusPanelProps> = ({
     <div className={styles.statusPanel}>
       <i className={clsx(icon, styles.statusIcon)} style={{}} />
       <div>
-        <div>${status}</div>
+        <div>{status}</div>
         <div>
-          (${sampleCount} ${sampleCount === 1 ? "sample" : "samples"})
+          ({sampleCount} {sampleCount === 1 ? "sample" : "samples"})
         </div>
       </div>
     </div>

inspect_ai/model/_model.py CHANGED Viewed

@@ -7,8 +7,10 @@ import os
 import time
 from contextvars import ContextVar
 from copy import deepcopy
+from types import TracebackType
 from typing import Any, Callable, Literal, Type, cast
+from pydantic_core import to_jsonable_python
 from tenacity import (
     retry,
     retry_if_exception,
@@ -109,6 +111,10 @@ class ModelAPI(abc.ABC):
         # set any explicitly specified api key
         self.api_key = api_key
+    async def close(self) -> None:
+        """Close method for closing any client allocated for the model."""
+        pass
     @abc.abstractmethod
     async def generate(
         self,
@@ -178,7 +184,17 @@ class ModelAPI(abc.ABC):
 class Model:
-    """Model interface."""
+    """Model interface.
+    Use `get_model()` to get an instance of a model. Model provides an
+    async context manager for closing the connection to it after use.
+    For example:
+    ```python
+    async with get_model("openai/gpt-4o") as model:
+        response = await model.generate("Say hello")
+    ```
+    """
     api: ModelAPI
     """Model API."""
@@ -196,10 +212,28 @@ class Model:
         self.api = api
         self.config = config
+        # state indicating whether our lifetime is bound by a context manager
+        self._context_bound = False
+        self._closed = False
         # if using the Model API standalone in a notebook this will
         # get hit before score() or eval() so we activate nest_asyncio
         platform_init()
+    async def __aenter__(self: "Model") -> "Model":
+        self._context_bound = True
+        return self
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if not self._closed:
+            await self.api.close()
+            self._closed = True
     @property
     def name(self) -> str:
         """Model name."""
@@ -598,10 +632,27 @@ def get_model(
     config: GenerateConfig = GenerateConfig(),
     base_url: str | None = None,
     api_key: str | None = None,
+    memoize: bool = True,
     **model_args: Any,
 ) -> Model:
     """Get an instance of a model.
+    Calls to get_model() are memoized (i.e. a call with the same arguments
+    will return an existing instance of the model rather than creating a
+    new one). You can disable this with `memoize=False`.
+    If you prefer to immediately close models after use (as well as
+    prevent caching) you can employ the async context manager built in
+    to the `Model` class. For example:
+    ```python
+    async with get_model("openai/gpt-4o") as model:
+        response = await model.generate("Say hello")
+    ```
+    In this case, the model client will be closed at the end of the
+    context manager and will not be available in the get_model() cache.
     Args:
        model: Model specification.
           If `Model` is passed it is returned unmodified,
@@ -611,6 +662,8 @@ def get_model(
        config: Configuration for model.
        base_url: Optional. Alternate base URL for model.
        api_key: Optional. API key for model.
+       memoize: Use/store a cached version of the model based on
+         the parameters to `get_model()`
        **model_args: Additional args to
           pass to model constructor.
@@ -637,6 +690,23 @@ def get_model(
         else:
             raise ValueError("No model specified (and no INSPECT_EVAL_MODEL defined)")
+    # see if we can return a memoized model instance
+    # (exclude mockllm since custom_outputs is an infinite generator)
+    model_cache_key: str = ""  # for mypy below
+    if model.startswith("mockllm/"):
+        memoize = False
+    if memoize:
+        model_cache_key = (
+            model
+            + config.model_dump_json(exclude_none=True)
+            + str(base_url)
+            + str(api_key)
+            + str(to_jsonable_python(model_args, fallback=lambda _: None))
+        )
+        cached = cached_model(model_cache_key)
+        if cached is not None:
+            return cached
     # split model into api name and model name if necessary
     api_name = None
     parts = model.split("/")
@@ -667,13 +737,30 @@ def get_model(
             config=config,
             **model_args,
         )
-        return Model(modelapi_instance, config)
+        m = Model(modelapi_instance, config)
+        if memoize:
+            _models[model_cache_key] = m
+        return m
     else:
         from_api = f" from {api_name}" if api_name else ""
         raise ValueError(f"Model name {model}{from_api} not recognized.")
+# cache for memoization of get_model
+_models: dict[str, Model] = {}
+def cached_model(key: str) -> Model | None:
+    # clean out context bound models before accessing the cache
+    for k in list(_models.keys()):
+        if _models[k]._context_bound:
+            del _models[k]
+    # read from the cache
+    return _models.get(key, None)
 def resolve_models(
     model: str | Model | list[str] | list[Model] | None,
     model_base_url: str | None = None,

inspect_ai/model/_providers/anthropic.py CHANGED Viewed

@@ -150,6 +150,10 @@ class AnthropicAPI(ModelAPI):
                 **model_args,
             )
+    @override
+    async def close(self) -> None:
+        await self.client.close()
     def is_bedrock(self) -> bool:
         return self.service == "bedrock"

inspect_ai/model/_providers/azureai.py CHANGED Viewed

@@ -124,6 +124,11 @@ class AzureAIAPI(ModelAPI):
         self.endpoint_url = endpoint_url
         self.model_args = model_args
+    @override
+    async def close(self) -> None:
+        # client is created/destroyed each time in generate()
+        pass
     async def generate(
         self,
         input: list[ChatMessage],

inspect_ai/model/_providers/bedrock.py CHANGED Viewed

@@ -259,6 +259,11 @@ class BedrockAPI(ModelAPI):
         except ImportError:
             raise pip_dependency_error("Bedrock API", ["aioboto3"])
+    @override
+    async def close(self) -> None:
+        # client is created/destroyed each time in generate()
+        pass
     @override
     def connection_key(self) -> str:
         return self.model_name

inspect_ai/model/_providers/cloudflare.py CHANGED Viewed

@@ -56,6 +56,10 @@ class CloudFlareAPI(ModelAPI):
         )
         self.model_args = model_args
+    @override
+    async def close(self) -> None:
+        await self.client.aclose()
     async def generate(
         self,
         input: list[ChatMessage],

inspect_ai/model/_providers/goodfire.py CHANGED Viewed

@@ -111,6 +111,11 @@ class GoodfireAPI(ModelAPI):
         # Initialize variant directly with model name
         self.variant = Variant(self.model_name)  # type: ignore
+    @override
+    async def close(self) -> None:
+        # httpx.AsyncClient is created on each generate()
+        pass
     def _to_goodfire_message(self, message: ChatMessage) -> GoodfireChatMessage:
         """Convert an Inspect message to a Goodfire message format.

inspect_ai/model/_providers/google.py CHANGED Viewed

@@ -134,6 +134,11 @@ class GoogleAPI(ModelAPI):
         # create model
         self.model = GenerativeModel(self.model_name)
+    @override
+    async def close(self) -> None:
+        # GenerativeModel uses a cached/shared client so there is no 'close'
+        pass
     async def generate(
         self,
         input: list[ChatMessage],
@@ -393,12 +398,12 @@ def prepend_system_messages(
 ) -> None:
     # create system_parts
     system_parts: list[PartType] = [
-        Part(text=message.content) for message in system_messages
+        Part(text=message.text) for message in system_messages
     ]
     # we want the system messages to be prepended to the first user message
     # (if there is no first user message then prepend one)
-    if messages[0].get("role") == "user":
+    if len(messages) > 0 and messages[0].get("role") == "user":
         messages[0]["parts"] = system_parts + messages[0].get("parts", [])
     else:
         messages.insert(0, ContentDict(role="user", parts=system_parts))
@@ -561,7 +566,15 @@ def completion_choices_from_candidates(
             completion_choice_from_candidate(candidate) for candidate in candidates_list
         ]
     else:
-        return []
+        return [
+            ChatCompletionChoice(
+                message=ChatMessageAssistant(
+                    content="I was unable to generate a response.",
+                    source="generate",
+                ),
+                stop_reason="unknown",
+            )
+        ]
 # google doesn't export FinishReason (it's in a sub-namespace with a beta

inspect_ai/model/_providers/groq.py CHANGED Viewed

@@ -87,6 +87,10 @@ class GroqAPI(ModelAPI):
             http_client=httpx.AsyncClient(limits=httpx.Limits(max_connections=None)),
         )
+    @override
+    async def close(self) -> None:
+        await self.client.close()
     async def generate(
         self,
         input: list[ChatMessage],

inspect_ai/model/_providers/hf.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import asyncio
 import copy
 import functools
+import gc
 import json
 import os
 from dataclasses import dataclass
@@ -112,6 +113,12 @@ class HuggingFaceAPI(ModelAPI):
         self.tokenizer.pad_token = self.tokenizer.eos_token
         self.tokenizer.padding_side = "left"
+    @override
+    async def close(self) -> None:
+        self.model = None
+        self.tokenizer = None
+        gc.collect()
     async def generate(
         self,
         input: list[ChatMessage],

inspect_ai/model/_providers/mistral.py CHANGED Viewed

@@ -118,6 +118,10 @@ class MistralAPI(ModelAPI):
             **model_args,
         )
+    @override
+    async def close(self) -> None:
+        await self.client.sdk_configuration.async_client.aclose()
     async def generate(
         self,
         input: list[ChatMessage],

inspect-ai 0.3.65__py3-none-any.whl → 0.3.67__py3-none-any.whl

inspect-ai 0.3.65py3-none-any.whl → 0.3.67py3-none-any.whl