PyPI - inspect-ai - Versions diffs - 0.3.74__py3-none-any.whl → 0.3.76__py3-none-any.whl - Mend

inspect-ai 0.3.74py3-none-any.whl → 0.3.76py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

inspect_ai/__init__.py +3 -2
inspect_ai/_cli/cache.py +1 -1
inspect_ai/_cli/common.py +15 -0
inspect_ai/_cli/eval.py +4 -5
inspect_ai/_cli/log.py +1 -1
inspect_ai/_cli/sandbox.py +1 -1
inspect_ai/_cli/trace.py +1 -1
inspect_ai/_cli/view.py +1 -1
inspect_ai/_display/core/config.py +3 -1
inspect_ai/_eval/eval.py +55 -61
inspect_ai/_eval/evalset.py +64 -154
inspect_ai/_eval/loader.py +27 -54
inspect_ai/_eval/registry.py +4 -15
inspect_ai/_eval/run.py +7 -4
inspect_ai/_eval/task/__init__.py +8 -2
inspect_ai/_eval/task/log.py +9 -1
inspect_ai/_eval/task/resolved.py +35 -0
inspect_ai/_eval/task/run.py +4 -0
inspect_ai/_eval/task/task.py +50 -69
inspect_ai/_eval/task/tasks.py +30 -0
inspect_ai/_util/constants.py +3 -0
inspect_ai/_util/dotenv.py +17 -0
inspect_ai/_util/logger.py +3 -0
inspect_ai/_util/registry.py +43 -2
inspect_ai/_view/server.py +28 -10
inspect_ai/_view/www/dist/assets/index.css +32 -19
inspect_ai/_view/www/dist/assets/index.js +17682 -29989
inspect_ai/_view/www/log-schema.json +79 -9
inspect_ai/_view/www/package.json +2 -2
inspect_ai/_view/www/src/appearance/styles.ts +6 -5
inspect_ai/_view/www/src/components/AnsiDisplay.tsx +2 -2
inspect_ai/_view/www/src/constants.ts +3 -0
inspect_ai/_view/www/src/logfile/remoteZipFile.ts +141 -20
inspect_ai/_view/www/src/plan/PlanDetailView.tsx +2 -1
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +1 -1
inspect_ai/_view/www/src/samples/chat/tools/tool.ts +7 -5
inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +1 -1
inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -2
inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +1 -0
inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +3 -1
inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +1 -1
inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +5 -2
inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +2 -2
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +5 -1
inspect_ai/_view/www/src/types/log.d.ts +11 -5
inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +17 -12
inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -1
inspect_ai/_view/www/yarn.lock +12 -5
inspect_ai/log/_log.py +10 -1
inspect_ai/log/_recorders/eval.py +27 -8
inspect_ai/log/_recorders/json.py +10 -2
inspect_ai/log/_transcript.py +13 -4
inspect_ai/model/_call_tools.py +13 -4
inspect_ai/model/_chat_message.py +15 -1
inspect_ai/model/_model.py +30 -12
inspect_ai/model/_model_output.py +6 -1
inspect_ai/model/_openai.py +11 -6
inspect_ai/model/_providers/anthropic.py +167 -77
inspect_ai/model/_providers/google.py +6 -2
inspect_ai/model/_providers/none.py +31 -0
inspect_ai/model/_providers/openai.py +11 -8
inspect_ai/model/_providers/providers.py +7 -0
inspect_ai/model/_providers/vertex.py +5 -2
inspect_ai/solver/_bridge/bridge.py +1 -1
inspect_ai/solver/_chain.py +7 -6
inspect_ai/tool/__init__.py +4 -0
inspect_ai/tool/_tool_call.py +5 -2
inspect_ai/tool/_tool_support_helpers.py +200 -0
inspect_ai/tool/_tools/_bash_session.py +119 -0
inspect_ai/tool/_tools/_computer/_computer.py +1 -1
inspect_ai/tool/_tools/_text_editor.py +121 -0
inspect_ai/tool/_tools/_web_browser/_back_compat.py +150 -0
inspect_ai/tool/_tools/_web_browser/_web_browser.py +75 -130
inspect_ai/tool/_tools/_web_search.py +2 -2
inspect_ai/util/_json.py +28 -0
inspect_ai/util/_sandbox/context.py +18 -8
inspect_ai/util/_sandbox/docker/config.py +1 -1
inspect_ai/util/_sandbox/docker/internal.py +3 -3
inspect_ai/util/_sandbox/environment.py +17 -2
{inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/METADATA +8 -5
{inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/RECORD +85 -108
{inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/WHEEL +1 -1
inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +0 -8
inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +0 -24
inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +0 -25
inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +0 -22
inspect_ai/tool/_tools/_web_browser/_resources/README.md +0 -63
inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +0 -71
inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +0 -323
inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +0 -5
inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +0 -279
inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +0 -9
inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +0 -293
inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +0 -94
inspect_ai/tool/_tools/_web_browser/_resources/constants.py +0 -2
inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +0 -2
inspect_ai/tool/_tools/_web_browser/_resources/mock_environment.py +0 -45
inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +0 -50
inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +0 -48
inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +0 -280
inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +0 -65
inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +0 -64
inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +0 -146
inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +0 -64
inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +0 -180
inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +0 -99
inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +0 -15
inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +0 -44
inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +0 -39
inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +0 -214
inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +0 -35
inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +0 -192
{inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info/licenses}/LICENSE +0 -0
{inspect_ai-0.3.74.dist-info → inspect_ai-0.3.76.dist-info}/top_level.txt +0 -0

inspect_ai/_view/www/src/types/log.d.ts CHANGED Viewed

@@ -148,7 +148,7 @@ export type Input =
       | ChatMessageAssistant
       | ChatMessageTool
     )[];
-export type Id1 = string;
+export type Id1 = string | null;
 export type Content =
   | string
   | (
@@ -175,7 +175,7 @@ export type Video = string;
 export type Format1 = "mp4" | "mpeg" | "mov";
 export type Source = ("input" | "generate") | null;
 export type Role = "system";
-export type Id2 = string;
+export type Id2 = string | null;
 export type Content1 =
   | string
   | (
@@ -188,7 +188,7 @@ export type Content1 =
 export type Source1 = ("input" | "generate") | null;
 export type Role1 = "user";
 export type ToolCallId = string[] | null;
-export type Id3 = string;
+export type Id3 = string | null;
 export type Content2 =
   | string
   | (
@@ -203,12 +203,13 @@ export type Role2 = "assistant";
 export type ToolCalls = ToolCall[] | null;
 export type Id4 = string;
 export type Function = string;
-export type Type8 = "function";
+export type Type8 = string;
+export type InternalName = string | null;
 export type ParseError = string | null;
 export type Title = string | null;
 export type Format2 = "text" | "markdown";
 export type Content3 = string;
-export type Id5 = string;
+export type Id5 = string | null;
 export type Content4 =
   | string
   | (
@@ -222,6 +223,7 @@ export type Source3 = ("input" | "generate") | null;
 export type Role3 = "tool";
 export type ToolCallId1 = string | null;
 export type Function1 = string | null;
+export type InternalName1 = string | null;
 export type Type9 =
   | "parsing"
   | "timeout"
@@ -369,6 +371,7 @@ export type Event6 = "tool";
 export type Type12 = "function";
 export type Id7 = string;
 export type Function2 = string;
+export type InternalName2 = string | null;
 export type Result1 =
   | string
   | number
@@ -911,6 +914,7 @@ export interface ToolCall {
   function: Function;
   arguments: Arguments;
   type: Type8;
+  internal_name: InternalName;
   parse_error: ParseError;
   view: ToolCallContent | null;
 }
@@ -933,6 +937,7 @@ export interface ChatMessageTool {
   role: Role3;
   tool_call_id: ToolCallId1;
   function: Function1;
+  internal_name: InternalName1;
   error: ToolCallError | null;
 }
 export interface ToolCallError {
@@ -1201,6 +1206,7 @@ export interface ToolEvent {
   id: Id7;
   function: Function2;
   arguments: Arguments1;
+  internal_name: InternalName2;
   view: ToolCallContent | null;
   result: Result1;
   truncated: Truncated;

inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx CHANGED Viewed

@@ -3,6 +3,7 @@ import { FC, useCallback } from "react";
 import { SampleSummary } from "../../api/types";
 import { ApplicationIcons } from "../../appearance/icons";
 import { CopyButton } from "../../components/CopyButton";
+import { kModelNone } from "../../constants";
 import { EvalResults, EvalSpec, Status } from "../../types/log";
 import { filename } from "../../utils/path";
 import styles from "./PrimaryBar.module.css";
@@ -71,18 +72,22 @@ export const PrimaryBar: FC<PrimaryBarProps> = ({
             >
               {evalSpec?.task}
             </div>
-            <div
-              id="task-model"
-              className={clsx(
-                "task-model",
-                "text-truncate",
-                styles.taskModel,
-                "text-size-base",
-              )}
-              title={evalSpec?.model}
-            >
-              {evalSpec?.model}
-            </div>
+            {evalSpec?.model && evalSpec.model !== kModelNone ? (
+              <div
+                id="task-model"
+                className={clsx(
+                  "task-model",
+                  "text-truncate",
+                  styles.taskModel,
+                  "text-size-base",
+                )}
+                title={evalSpec?.model}
+              >
+                {evalSpec?.model}
+              </div>
+            ) : (
+              ""
+            )}
           </div>
           <div className={clsx("text-size-small", styles.secondaryContainer)}>
             <div className={clsx("navbar-secondary-text", "text-truncate")}>

inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx CHANGED Viewed

@@ -1,6 +1,7 @@
 import clsx from "clsx";
 import { FC, Fragment } from "react";
 import { EvalLogHeader } from "../../api/types";
+import { kModelNone } from "../../constants";
 import { EvalStatus } from "./EvalStatus";
 import styles from "./SidebarLogEntry.module.css";
@@ -51,7 +52,7 @@ export const SidebarLogEntry: FC<SidebarLogEntryProps> = ({
           </div>
           <small className={clsx("mb-1", "text-size-small")}>{timeStr}</small>
-          {model ? (
+          {model && model !== kModelNone ? (
             <div>
               <small className={clsx("mb-1", "text-size-small")}>{model}</small>
             </div>

inspect_ai/_view/www/yarn.lock CHANGED Viewed

@@ -220,13 +220,20 @@
     "@codemirror/view" "^6.0.0"
     crelt "^1.0.5"
-"@codemirror/state@^6.0.0", "@codemirror/state@^6.4.0", "@codemirror/state@^6.5.0", "@codemirror/state@^6.5.1":
+"@codemirror/state@^6.0.0", "@codemirror/state@^6.4.0", "@codemirror/state@^6.5.0":
   version "6.5.1"
   resolved "https://registry.yarnpkg.com/@codemirror/state/-/state-6.5.1.tgz#e5c0599f7b43cf03f19e05861317df5425c07904"
   integrity sha512-3rA9lcwciEB47ZevqvD8qgbzhM9qMb8vCcQCNmDfVRPQG4JT9mSb0Jg8H7YjKGGQcFnLN323fj9jdnG59Kx6bg==
   dependencies:
     "@marijn/find-cluster-break" "^1.0.0"
+"@codemirror/state@^6.5.2":
+  version "6.5.2"
+  resolved "https://registry.yarnpkg.com/@codemirror/state/-/state-6.5.2.tgz#8eca3a64212a83367dc85475b7d78d5c9b7076c6"
+  integrity sha512-FVqsPqtPWKVVL3dPSxy8wEF/ymIEuVzF1PK3VbUgrxXpJUSHQWWZz4JMToquRxnkw+36LTamCZG2iua2Ptq0fA==
+  dependencies:
+    "@marijn/find-cluster-break" "^1.0.0"
 "@codemirror/view@^6.0.0", "@codemirror/view@^6.17.0", "@codemirror/view@^6.23.0", "@codemirror/view@^6.27.0", "@codemirror/view@^6.35.0":
   version "6.36.2"
   resolved "https://registry.yarnpkg.com/@codemirror/view/-/view-6.36.2.tgz#aeb644e161440734ac5a153bf6e5b4a4355047be"
@@ -862,10 +869,10 @@ argparse@^2.0.1:
   resolved "https://registry.yarnpkg.com/argparse/-/argparse-2.0.1.tgz#246f50f3ca78a3240f6c997e8a9bd1eac49e4b38"
   integrity sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==
-asciinema-player@^3.8.2:
-  version "3.8.2"
-  resolved "https://registry.yarnpkg.com/asciinema-player/-/asciinema-player-3.8.2.tgz#12fbf475ddaeee0051ace17532e5f003475f6dfa"
-  integrity sha512-Lgcnj9u/H6sRpGRX1my7Azcay6llLmB/GVkCGcDbPwdTVTisS1ir8SQ9jRWRvjlLUjpSJkN0euruvy3sLRM8tw==
+asciinema-player@^3.9.0:
+  version "3.9.0"
+  resolved "https://registry.yarnpkg.com/asciinema-player/-/asciinema-player-3.9.0.tgz#c60742f85978e861b878fc7eb6289a5622c298af"
+  integrity sha512-SXVFImVzeNr8ZUdNIHABGuzlbnGWTKy245AquAjODsAnv+Lp6vxjYGN0LfA8ns30tnx/ag/bMrTbLq13TpHE6w==
   dependencies:
     "@babel/runtime" "^7.21.0"
     solid-js "^1.3.0"

inspect_ai/log/_log.py CHANGED Viewed

@@ -215,7 +215,16 @@ class EvalSample(BaseModel):
         Returns:
           StoreModel: Instance of model_cls bound to sample store data.
         """
-        return model_cls(store=Store(self.store))
+        # un-namespace names for creation
+        data = {
+            k.replace(f"{model_cls.__name__}:", "", 1): v for k, v in self.store.items()
+        }
+        # since we are reading from the log provide a fully detached store
+        data["store"] = Store()
+        # create the model
+        return model_cls.model_validate(data)
     events: list[Event] = Field(default_factory=list)
     """Events that occurred during sample execution."""

inspect_ai/log/_recorders/eval.py CHANGED Viewed

@@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
 from pydantic_core import to_json
 from typing_extensions import override
-from inspect_ai._util.constants import LOG_SCHEMA_VERSION
+from inspect_ai._util.constants import DESERIALIZING_CONTEXT, LOG_SCHEMA_VERSION
 from inspect_ai._util.content import (
     ContentAudio,
     ContentImage,
@@ -224,7 +224,9 @@ class EvalRecorder(FileRecorder):
             with ZipFile(z, mode="r") as zip:
                 try:
                     with zip.open(_sample_filename(id, epoch), "r") as f:
-                        return EvalSample(**json.load(f))
+                        return EvalSample.model_validate(
+                            json.load(f), context=DESERIALIZING_CONTEXT
+                        )
                 except KeyError:
                     raise IndexError(
                         f"Sample id {id} for epoch {epoch} not found in log {location}"
@@ -414,7 +416,10 @@ def _read_log(log: BinaryIO, location: str, header_only: bool = False) -> EvalLo
         if REDUCTIONS_JSON in zip.namelist():
             with zip.open(REDUCTIONS_JSON, "r") as f:
                 reductions = [
-                    EvalSampleReductions(**reduction) for reduction in json.load(f)
+                    EvalSampleReductions.model_validate(
+                        reduction, context=DESERIALIZING_CONTEXT
+                    )
+                    for reduction in json.load(f)
                 ]
                 if evalLog.results is not None:
                     evalLog.reductions = reductions
@@ -425,7 +430,11 @@ def _read_log(log: BinaryIO, location: str, header_only: bool = False) -> EvalLo
             for name in zip.namelist():
                 if name.startswith(f"{SAMPLES_DIR}/") and name.endswith(".json"):
                     with zip.open(name, "r") as f:
-                        samples.append(EvalSample(**json.load(f)))
+                        samples.append(
+                            EvalSample.model_validate(
+                                json.load(f), context=DESERIALIZING_CONTEXT
+                            ),
+                        )
             sort_samples(samples)
             evalLog.samples = samples
         return evalLog
@@ -452,7 +461,10 @@ def _read_all_summaries(zip: ZipFile, count: int) -> list[SampleSummary]:
     if SUMMARIES_JSON in zip.namelist():
         summaries_raw = _read_json(zip, SUMMARIES_JSON)
         if isinstance(summaries_raw, list):
-            return [SampleSummary(**value) for value in summaries_raw]
+            return [
+                SampleSummary.model_validate(value, context=DESERIALIZING_CONTEXT)
+                for value in summaries_raw
+            ]
         else:
             raise ValueError(
                 f"Expected a list of summaries when reading {SUMMARIES_JSON}"
@@ -464,7 +476,14 @@ def _read_all_summaries(zip: ZipFile, count: int) -> list[SampleSummary]:
             summary_path = _journal_summary_path(summary_file)
             summary = _read_json(zip, summary_path)
             if isinstance(summary, list):
-                summaries.extend([SampleSummary(**value) for value in summary])
+                summaries.extend(
+                    [
+                        SampleSummary.model_validate(
+                            value, context=DESERIALIZING_CONTEXT
+                        )
+                        for value in summary
+                    ]
+                )
             else:
                 raise ValueError(
                     f"Expected a list of summaries when reading {summary_file}"
@@ -476,12 +495,12 @@ def _read_header(zip: ZipFile, location: str) -> EvalLog:
     # first see if the header is here
     if HEADER_JSON in zip.namelist():
         with zip.open(HEADER_JSON, "r") as f:
-            log = EvalLog(**json.load(f))
+            log = EvalLog.model_validate(json.load(f), context=DESERIALIZING_CONTEXT)
             log.location = location
             return log
     else:
         with zip.open(_journal_path(START_JSON), "r") as f:
-            start = LogStart(**json.load(f))
+            start = LogStart.model_validate(json.load(f), context=DESERIALIZING_CONTEXT)
         return EvalLog(
             version=start.version, eval=start.eval, plan=start.plan, location=location
         )

inspect_ai/log/_recorders/json.py CHANGED Viewed

@@ -7,7 +7,7 @@ from pydantic import BaseModel
 from pydantic_core import from_json
 from typing_extensions import override
-from inspect_ai._util.constants import LOG_SCHEMA_VERSION
+from inspect_ai._util.constants import DESERIALIZING_CONTEXT, LOG_SCHEMA_VERSION
 from inspect_ai._util.error import EvalError
 from inspect_ai._util.file import absolute_file_path, file
 from inspect_ai._util.trace import trace_action
@@ -143,7 +143,7 @@ class JSONRecorder(FileRecorder):
         with file(location, "r") as f:
             # parse w/ pydantic
             raw_data = from_json(f.read())
-            log = EvalLog(**raw_data)
+            log = EvalLog.model_validate(raw_data, context=DESERIALIZING_CONTEXT)
             log.location = location
             # fail for unknown version
@@ -217,6 +217,11 @@ def _read_header_streaming(log_file: str) -> EvalLog:
         # Parse the log file, stopping before parsing samples
         status: Literal["started", "success", "cancelled", "error"] | None = None
+        eval: EvalSpec | None = None
+        plan: EvalPlan | None = None
+        results: EvalResults | None = None
+        stats: EvalStats | None = None
+        error: EvalError | None = None
         for k, v in ijson.kvitems(f, ""):
             if k == "status":
                 assert v in get_args(
@@ -239,6 +244,9 @@ def _read_header_streaming(log_file: str) -> EvalLog:
                 break
     assert status, "Must encounter a 'status'"
+    assert eval, "Must encounter a 'eval'"
+    assert plan, "Must encounter a 'plan'"
+    assert stats, "Must encounter a 'stats'"
     return EvalLog(
         eval=eval,

inspect_ai/log/_transcript.py CHANGED Viewed

@@ -146,7 +146,7 @@ class ModelEvent(BaseEvent):
     """working time for model call that succeeded (i.e. was not retried)."""
     @field_serializer("completed")
-    def serialize_completed(self, dt: datetime) -> str:
+    def serialize_completed(self, dt: datetime | None) -> str | None:
         if dt is None:
             return None
         return dt.astimezone().isoformat()
@@ -170,6 +170,9 @@ class ToolEvent(BaseEvent):
     arguments: dict[str, JsonValue]
     """Arguments to function."""
+    internal_name: str | None = Field(default=None)
+    """Internal name for tool (if any)."""
     view: ToolCallContent | None = Field(default=None)
     """Custom view of tool call input."""
@@ -235,7 +238,9 @@ class ToolEvent(BaseEvent):
     """Required so that we can include '_cancel_fn' as a member."""
     @field_serializer("completed")
-    def serialize_completed(self, dt: datetime) -> str:
+    def serialize_completed(self, dt: datetime | None) -> str | None:
+        if dt is None:
+            return None
         return dt.astimezone().isoformat()
@@ -270,7 +275,9 @@ class SandboxEvent(BaseEvent):
     """Time that sandbox action completed (see `timestamp` for started)"""
     @field_serializer("completed")
-    def serialize_completed(self, dt: datetime) -> str:
+    def serialize_completed(self, dt: datetime | None) -> str | None:
+        if dt is None:
+            return None
         return dt.astimezone().isoformat()
@@ -412,7 +419,9 @@ class SubtaskEvent(BaseEvent):
     """Working time for subtask (i.e. time not spent waiting on semaphores or model retries)."""
     @field_serializer("completed")
-    def serialize_completed(self, dt: datetime) -> str:
+    def serialize_completed(self, dt: datetime | None) -> str | None:
+        if dt is None:
+            return None
         return dt.astimezone().isoformat()

inspect_ai/model/_call_tools.py CHANGED Viewed

@@ -25,7 +25,6 @@ from typing import (
 if sys.version_info < (3, 11):
     from exceptiongroup import ExceptionGroup
 import anyio
 import yaml
 from anyio.streams.memory import MemoryObjectSendStream
@@ -168,6 +167,7 @@ async def call_tools(
                 id=call.id,
                 function=call.function,
                 arguments=call.arguments,
+                internal_name=call.internal_name,
                 result=content,
                 truncated=truncated,
                 view=call.view,
@@ -183,6 +183,7 @@ async def call_tools(
                             content=content,
                             tool_call_id=call.id,
                             function=call.function,
+                            internal_name=call.internal_name,
                             error=tool_error,
                         ),
                         event,
@@ -201,6 +202,7 @@ async def call_tools(
                 id=call.id,
                 function=call.function,
                 arguments=call.arguments,
+                internal_name=call.internal_name,
                 view=call.view,
                 pending=True,
             )
@@ -216,9 +218,7 @@ async def call_tools(
                     tg.start_soon(call_tool_task, call, send_stream)
                     event._set_cancel_fn(tg.cancel_scope.cancel)
                     async with receive_stream:
-                        async for result in receive_stream:
-                            tool_message, result_event = result
-                            break
+                        tool_message, result_event = await receive_stream.receive()
             except ExceptionGroup as ex:
                 raise ex.exceptions[0]
@@ -226,6 +226,7 @@ async def call_tools(
                 tool_message = ChatMessageTool(
                     content="",
                     function=call.function,
+                    internal_name=call.internal_name,
                     tool_call_id=call.id,
                     error=ToolCallError(
                         "timeout", "Command timed out before completing."
@@ -235,6 +236,7 @@ async def call_tools(
                     id=call.id,
                     function=call.function,
                     arguments=call.arguments,
+                    internal_name=call.internal_name,
                     result=tool_message.content,
                     truncated=None,
                     view=call.view,
@@ -508,6 +510,13 @@ def tool_parse_error_message(arguments: str, ex: Exception) -> str:
 def parse_tool_call(
     id: str, function: str, arguments: str, tools: list[ToolInfo] | None = None
 ) -> ToolCall:
+    """Parse a tool call from a JSON payload.
+    Note that this function doesn't know about internal tool names so the caller
+    should ammend the returned `ToolCall` by mapping the parsed `function` field from
+    from an internal name to an inspect tool name and fixing up the `ToolCall` object
+    as required to reflect this change.
+    """
     error: str | None = None
     arguments_dict: dict[str, Any] = {}

inspect_ai/model/_chat_message.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Any, Literal, Type, Union
 from pydantic import BaseModel, Field, model_validator
 from shortuuid import uuid
+from inspect_ai._util.constants import DESERIALIZING
 from inspect_ai._util.content import Content, ContentReasoning, ContentText
 from inspect_ai.tool import ToolCall
 from inspect_ai.tool._tool_call import ToolCallError
@@ -16,7 +17,7 @@ logger = getLogger(__name__)
 class ChatMessageBase(BaseModel):
     """Base class for chat messages."""
-    id: str = Field(default_factory=uuid)
+    id: str | None = Field(default=None)
     """Unique identifer for message."""
     content: str | list[Content]
@@ -25,6 +26,16 @@ class ChatMessageBase(BaseModel):
     source: Literal["input", "generate"] | None = Field(default=None)
     """Source of message."""
+    def model_post_init(self, __context: Any) -> None:
+        # check if deserializing
+        is_deserializing = isinstance(__context, dict) and __context.get(
+            DESERIALIZING, False
+        )
+        # Generate ID if needed and not deserializing
+        if self.id is None and not is_deserializing:
+            self.id = uuid()
     @property
     def text(self) -> str:
         """Get the text content of this message.
@@ -147,6 +158,9 @@ class ChatMessageTool(ChatMessageBase):
     function: str | None = Field(default=None)
     """Name of function called."""
+    internal_name: str | None = Field(default=None)
+    """Internal name for tool (if any)."""
     error: ToolCallError | None = Field(default=None)
     """Error which occurred during tool call."""

inspect_ai/model/_model.py CHANGED Viewed

@@ -33,6 +33,7 @@ from inspect_ai._util.content import (
 from inspect_ai._util.hooks import init_hooks, override_api_key, send_telemetry
 from inspect_ai._util.interrupt import check_sample_interrupt
 from inspect_ai._util.logger import warn_once
+from inspect_ai._util.notgiven import NOT_GIVEN, NotGiven
 from inspect_ai._util.platform import platform_init
 from inspect_ai._util.registry import (
     RegistryInfo,
@@ -77,7 +78,7 @@ class ModelAPI(abc.ABC):
     by the user. You can then pass these on to the approriate place in
     your model initialisation code (for example, here is what many
     of the built-in providers do with the `model_args` passed to them:
-    https://inspect.ai-safety-institute.org.uk/models.html#model-args)
+    https://inspect.aisi.org.uk/models.html#model-args)
     """
     def __init__(
@@ -232,15 +233,19 @@ class Model:
     config: GenerateConfig
     """Generation config."""
-    def __init__(self, api: ModelAPI, config: GenerateConfig) -> None:
+    def __init__(
+        self, api: ModelAPI, config: GenerateConfig, model_args: dict[str, Any] = {}
+    ) -> None:
         """Create a model.
         Args:
            api: Model API provider.
            config: Model configuration.
+           model_args: Optional model args
         """
         self.api = api
         self.config = config
+        self.model_args = model_args
         # state indicating whether our lifetime is bound by a context manager
         self._context_bound = False
@@ -449,6 +454,7 @@ class Model:
         async def generate() -> ModelOutput:
             check_sample_interrupt()
+            cache_entry: CacheEntry | None
             if cache:
                 if isinstance(cache, CachePolicy):
                     policy = cache
@@ -476,6 +482,8 @@ class Model:
                         call=None,
                     )
                     return existing
+            else:
+                cache_entry = None
             # verify that model apis are allowed
             self.verify_model_apis()
@@ -545,7 +553,7 @@ class Model:
                     json.dumps(dict(model=str(self), usage=output.usage.model_dump())),
                 )
-            if cache:
+            if cache and cache_entry:
                 cache_store(entry=cache_entry, output=output)
             return output
@@ -773,6 +781,10 @@ def get_model(
     if isinstance(model, Model):
         return model
+    # next see if this is the special "none" model
+    if model == "none":
+        model = "none/none"
     # now try finding an 'ambient' model (active or env var)
     if model is None:
         # return active_model if there is one
@@ -835,7 +847,7 @@ def get_model(
             config=config,
             **model_args,
         )
-        m = Model(modelapi_instance, config)
+        m = Model(modelapi_instance, config, model_args)
         if memoize:
             _models[model_cache_key] = m
         return m
@@ -860,17 +872,25 @@ def cached_model(key: str) -> Model | None:
 def resolve_models(
-    model: str | Model | list[str] | list[Model] | None,
+    model: str | Model | list[str] | list[Model] | None | NotGiven = NOT_GIVEN,
     model_base_url: str | None = None,
     model_args: dict[str, Any] = dict(),
     config: GenerateConfig = GenerateConfig(),
 ) -> list[Model]:
+    # resolve NotGiven to current INSPECT_EVAL_MODEL
+    if isinstance(model, NotGiven):
+        model = os.getenv("INSPECT_EVAL_MODEL", None)
+    # resolve None to NoModel
+    if model is None:
+        return [get_model("none")]
     # reflect back a plain model
     if isinstance(model, Model):
         return [model]
     # helper to resolve model of various types
-    def resolve_model(m: str | Model | None) -> Model:
+    def resolve_model(m: str | Model) -> Model:
         return get_model(
             model=m,
             base_url=model_base_url,
@@ -878,11 +898,8 @@ def resolve_models(
             **model_args,
         )
-    # resolve None and str to list
-    if model is None or isinstance(model, str):
-        model = model or os.getenv("INSPECT_EVAL_MODEL", None)
-        if model is None:
-            raise ValueError("No model specified (and no INSPECT_EVAL_MODEL defined)")
+    # str to list
+    if isinstance(model, str):
         model = [m.strip() for m in model.split(",")]
     # resolve models
@@ -1098,6 +1115,7 @@ def tool_result_images_reducer(
                     content=edited_tool_message_content,
                     tool_call_id=message.tool_call_id,
                     function=message.function,
+                    internal_name=message.internal_name,
                 )
             ],
             pending_content + new_user_message_content,
@@ -1236,7 +1254,7 @@ def active_model() -> Model | None:
 # shared contexts for asyncio tasks
-active_model_context_var: ContextVar[Model] = ContextVar("active_model")
+active_model_context_var: ContextVar[Model | None] = ContextVar("active_model")
 def handle_sample_message_limit(input: str | list[ChatMessage]) -> None:

inspect_ai/model/_model_output.py CHANGED Viewed

@@ -188,8 +188,10 @@ class ModelOutput(BaseModel):
         model: str,
         tool_name: str,
         tool_arguments: dict[str, Any],
+        internal_tool_name: str | None = None,
         tool_call_id: str | None = None,
         content: str | None = None,
+        type: str = "function",
     ) -> "ModelOutput":
         """
         Returns a ModelOutput for requesting a tool call.
@@ -197,6 +199,8 @@ class ModelOutput(BaseModel):
         Args:
             model: model name
             tool_name: The name of the tool.
+            internal_tool_name: The model's internal name for the tool (if any).
+            type: The model's type for the tool. e.g. "function", "computer_use_preview"
             tool_arguments: The arguments passed to the tool.
             tool_call_id: Optional ID for the tool call. Defaults to a random UUID.
             content: Optional content to include in the message. Defaults to "tool call for tool {tool_name}".
@@ -221,8 +225,9 @@ class ModelOutput(BaseModel):
                             ToolCall(
                                 id=tool_call_id,
                                 function=tool_name,
+                                internal_name=internal_tool_name,
                                 arguments=tool_arguments,
-                                type="function",
+                                type=type,
                             )
                         ],
                     ),

inspect-ai 0.3.74__py3-none-any.whl → 0.3.76__py3-none-any.whl

inspect-ai 0.3.74py3-none-any.whl → 0.3.76py3-none-any.whl