PyPI - inspect-ai - Versions diffs - 0.3.48__py3-none-any.whl → 0.3.50__py3-none-any.whl - Mend

inspect-ai 0.3.48py3-none-any.whl → 0.3.50py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

inspect_ai/_cli/info.py +2 -2
inspect_ai/_cli/log.py +2 -2
inspect_ai/_cli/score.py +2 -2
inspect_ai/_display/core/display.py +19 -0
inspect_ai/_display/core/panel.py +37 -7
inspect_ai/_display/core/progress.py +29 -2
inspect_ai/_display/core/results.py +79 -40
inspect_ai/_display/core/textual.py +21 -0
inspect_ai/_display/rich/display.py +28 -8
inspect_ai/_display/textual/app.py +112 -3
inspect_ai/_display/textual/display.py +1 -1
inspect_ai/_display/textual/widgets/samples.py +132 -91
inspect_ai/_display/textual/widgets/task_detail.py +232 -0
inspect_ai/_display/textual/widgets/tasks.py +74 -6
inspect_ai/_display/textual/widgets/toggle.py +32 -0
inspect_ai/_eval/context.py +2 -0
inspect_ai/_eval/eval.py +4 -3
inspect_ai/_eval/loader.py +1 -1
inspect_ai/_eval/run.py +35 -2
inspect_ai/_eval/task/log.py +13 -11
inspect_ai/_eval/task/results.py +12 -3
inspect_ai/_eval/task/run.py +139 -36
inspect_ai/_eval/task/sandbox.py +2 -1
inspect_ai/_util/_async.py +30 -1
inspect_ai/_util/file.py +47 -5
inspect_ai/_util/html.py +3 -0
inspect_ai/_util/logger.py +6 -5
inspect_ai/_util/platform.py +5 -6
inspect_ai/_util/registry.py +1 -1
inspect_ai/_view/server.py +9 -9
inspect_ai/_view/www/App.css +2 -2
inspect_ai/_view/www/dist/assets/index.css +2 -2
inspect_ai/_view/www/dist/assets/index.js +395 -307
inspect_ai/_view/www/log-schema.json +13 -0
inspect_ai/_view/www/package.json +1 -0
inspect_ai/_view/www/src/components/MessageBand.mjs +1 -1
inspect_ai/_view/www/src/components/Tools.mjs +27 -16
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -3
inspect_ai/_view/www/src/samples/SampleScoreView.mjs +52 -77
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -13
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +40 -18
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +15 -2
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +4 -2
inspect_ai/_view/www/src/types/log.d.ts +2 -0
inspect_ai/_view/www/src/utils/debugging.mjs +23 -0
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +2 -0
inspect_ai/_view/www/yarn.lock +9 -4
inspect_ai/approval/__init__.py +1 -1
inspect_ai/approval/_human/approver.py +35 -0
inspect_ai/approval/_human/console.py +62 -0
inspect_ai/approval/_human/manager.py +108 -0
inspect_ai/approval/_human/panel.py +233 -0
inspect_ai/approval/_human/util.py +51 -0
inspect_ai/dataset/_sources/hf.py +2 -2
inspect_ai/dataset/_sources/util.py +1 -1
inspect_ai/log/_file.py +106 -36
inspect_ai/log/_recorders/eval.py +226 -158
inspect_ai/log/_recorders/file.py +9 -6
inspect_ai/log/_recorders/json.py +35 -12
inspect_ai/log/_recorders/recorder.py +15 -15
inspect_ai/log/_samples.py +52 -0
inspect_ai/model/_model.py +14 -0
inspect_ai/model/_model_output.py +4 -0
inspect_ai/model/_providers/azureai.py +1 -1
inspect_ai/model/_providers/hf.py +106 -4
inspect_ai/model/_providers/util/__init__.py +2 -0
inspect_ai/model/_providers/util/hf_handler.py +200 -0
inspect_ai/scorer/_common.py +1 -1
inspect_ai/solver/_plan.py +0 -8
inspect_ai/solver/_task_state.py +18 -1
inspect_ai/solver/_use_tools.py +9 -1
inspect_ai/tool/_tool_call.py +1 -1
inspect_ai/tool/_tool_def.py +2 -2
inspect_ai/tool/_tool_info.py +14 -2
inspect_ai/tool/_tool_params.py +2 -1
inspect_ai/tool/_tools/_execute.py +1 -1
inspect_ai/tool/_tools/_web_browser/_web_browser.py +6 -0
inspect_ai/util/__init__.py +5 -6
inspect_ai/util/_panel.py +91 -0
inspect_ai/util/_sandbox/__init__.py +2 -6
inspect_ai/util/_sandbox/context.py +4 -3
inspect_ai/util/_sandbox/docker/compose.py +12 -2
inspect_ai/util/_sandbox/docker/docker.py +19 -9
inspect_ai/util/_sandbox/docker/util.py +10 -2
inspect_ai/util/_sandbox/environment.py +47 -41
inspect_ai/util/_sandbox/local.py +15 -10
inspect_ai/util/_sandbox/self_check.py +6 -3
inspect_ai/util/_subprocess.py +43 -3
{inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/METADATA +2 -2
{inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/RECORD +94 -85
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
inspect_ai/approval/_human.py +0 -123
{inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.48.dist-info → inspect_ai-0.3.50.dist-info}/top_level.txt +0 -0

inspect_ai/log/_file.py CHANGED Viewed

@@ -1,17 +1,15 @@
-import asyncio
 import os
 import re
 from logging import getLogger
 from typing import Any, Callable, Generator, Literal, cast
-import fsspec  # type: ignore
-from fsspec.asyn import AsyncFileSystem  # type: ignore
-from fsspec.core import split_protocol  # type: ignore
 from pydantic_core import to_json
+from inspect_ai._util._async import run_coroutine
 from inspect_ai._util.constants import ALL_LOG_FORMATS, EVAL_LOG_FORMAT
 from inspect_ai._util.file import (
     FileInfo,
+    async_fileystem,
     file,
     filesystem,
 )
@@ -110,25 +108,25 @@ async def list_eval_logs_async(
     # async filesystem if we can
     fs = filesystem(log_dir, fs_options)
     if fs.is_async():
-        async_fs = async_fileystem(log_dir, fs_options=fs_options)
-        if await async_fs._exists(log_dir):
-            # prevent caching of listings
-            async_fs.invalidate_cache(log_dir)
-            # list logs
-            if recursive:
-                files: list[dict[str, Any]] = []
-                async for _, _, filenames in async_fs._walk(log_dir, detail=True):
-                    files.extend(filenames.values())
+        async with async_fileystem(log_dir, fs_options=fs_options) as async_fs:
+            if await async_fs._exists(log_dir):
+                # prevent caching of listings
+                async_fs.invalidate_cache(log_dir)
+                # list logs
+                if recursive:
+                    files: list[dict[str, Any]] = []
+                    async for _, _, filenames in async_fs._walk(log_dir, detail=True):
+                        files.extend(filenames.values())
+                else:
+                    files = cast(
+                        list[dict[str, Any]],
+                        await async_fs._ls(log_dir, detail=True),
+                    )
+                logs = [fs._file_info(file) for file in files]
+                # resolve to eval logs
+                return log_files_from_ls(logs, formats, descending)
             else:
-                files = cast(
-                    list[dict[str, Any]],
-                    async_fs._ls(log_dir, detail=True),
-                )
-            logs = [fs._file_info(file) for file in files]
-            # resolve to eval logs
-            return log_files_from_ls(logs, formats, descending)
-        else:
-            return []
+                return []
     else:
         return list_eval_logs(
             log_dir=log_dir,
@@ -146,6 +144,22 @@ def write_eval_log(
 ) -> None:
     """Write an evaluation log.
+    Args:
+       log (EvalLog): Evaluation log to write.
+       location (str | FileInfo): Location to write log to.
+       format (Literal["eval", "json", "auto"]): Write to format
+          (defaults to 'auto' based on `log_file` extension)
+    """
+    run_coroutine(write_eval_log_async(log, location, format))
+async def write_eval_log_async(
+    log: EvalLog,
+    location: str | FileInfo | None = None,
+    format: Literal["eval", "json", "auto"] = "auto",
+) -> None:
+    """Write an evaluation log.
     Args:
        log (EvalLog): Evaluation log to write.
        location (str | FileInfo): Location to write log to.
@@ -169,7 +183,7 @@ def write_eval_log(
         recorder_type = recorder_type_for_location(location)
     else:
         recorder_type = recorder_type_for_format(format)
-    recorder_type.write_log(location, log)
+    await recorder_type.write_log(location, log)
     logger.debug(f"Writing eval log to {location} completed")
@@ -224,6 +238,31 @@ def read_eval_log(
 ) -> EvalLog:
     """Read an evaluation log.
+    Args:
+       log_file (str | FileInfo): Log file to read.
+       header_only (bool): Read only the header (i.e. exclude
+         the "samples" and "logging" fields). Defaults to False.
+       resolve_attachments (bool): Resolve attachments (e.g. images)
+          to their full content.
+       format (Literal["eval", "json", "auto"]): Read from format
+          (defaults to 'auto' based on `log_file` extension)
+    Returns:
+       EvalLog object read from file.
+    """
+    return run_coroutine(
+        read_eval_log_async(log_file, header_only, resolve_attachments, format)
+    )
+async def read_eval_log_async(
+    log_file: str | FileInfo,
+    header_only: bool = False,
+    resolve_attachments: bool = False,
+    format: Literal["eval", "json", "auto"] = "auto",
+) -> EvalLog:
+    """Read an evaluation log.
     Args:
        log_file (str | FileInfo): Log file to read.
        header_only (bool): Read only the header (i.e. exclude
@@ -245,7 +284,7 @@ def read_eval_log(
         recorder_type = recorder_type_for_location(log_file)
     else:
         recorder_type = recorder_type_for_format(format)
-    log = recorder_type.read_log(log_file, header_only)
+    log = await recorder_type.read_log(log_file, header_only)
     # resolve attachement if requested
     if resolve_attachments and log.samples:
@@ -267,7 +306,15 @@ def read_eval_log(
 def read_eval_log_headers(
     log_files: list[str] | list[FileInfo] | list[EvalLogInfo],
 ) -> list[EvalLog]:
-    return [read_eval_log(log_file, header_only=True) for log_file in log_files]
+    return run_coroutine(read_eval_log_headers_async(log_files))
+async def read_eval_log_headers_async(
+    log_files: list[str] | list[FileInfo] | list[EvalLogInfo],
+) -> list[EvalLog]:
+    return [
+        await read_eval_log_async(log_file, header_only=True) for log_file in log_files
+    ]
 def read_eval_log_sample(
@@ -279,6 +326,35 @@ def read_eval_log_sample(
 ) -> EvalSample:
     """Read a sample from an evaluation log.
+    Args:
+       log_file (str | FileInfo): Log file to read.
+       id (int | str): Sample id to read.
+       epoch (int): Epoch for sample id (defaults to 1)
+       resolve_attachments (bool): Resolve attachments (e.g. images)
+          to their full content.
+       format (Literal["eval", "json", "auto"]): Read from format
+          (defaults to 'auto' based on `log_file` extension)
+    Returns:
+       EvalSample object read from file.
+    Raises:
+       IndexError: If the passed id and epoch are not found.
+    """
+    return run_coroutine(
+        read_eval_log_sample_async(log_file, id, epoch, resolve_attachments, format)
+    )
+async def read_eval_log_sample_async(
+    log_file: str | FileInfo,
+    id: int | str,
+    epoch: int = 1,
+    resolve_attachments: bool = False,
+    format: Literal["eval", "json", "auto"] = "auto",
+) -> EvalSample:
+    """Read a sample from an evaluation log.
     Args:
        log_file (str | FileInfo): Log file to read.
        id (int | str): Sample id to read.
@@ -301,7 +377,7 @@ def read_eval_log_sample(
         recorder_type = recorder_type_for_location(log_file)
     else:
         recorder_type = recorder_type_for_format(format)
-    sample = recorder_type.read_log_sample(log_file, id, epoch)
+    sample = await recorder_type.read_log_sample(log_file, id, epoch)
     if resolve_attachments:
         sample = resolve_sample_attachments(sample)
@@ -442,7 +518,7 @@ def log_file_info(info: FileInfo) -> "EvalLogInfo":
     )
-def eval_log_json(log: EvalLog) -> str:
+def eval_log_json(log: EvalLog) -> bytes:
     # serialize to json (ignore values that are unserializable)
     # these values often result from solvers using metadata to
     # pass around 'live' objects -- this is fine to do and we
@@ -452,14 +528,8 @@ def eval_log_json(log: EvalLog) -> str:
         indent=2,
         exclude_none=True,
         fallback=lambda _x: None,
-    ).decode()
+    )
-def async_fileystem(log_file: str, fs_options: dict[str, Any] = {}) -> AsyncFileSystem:
-    # determine protocol
-    protocol, _ = split_protocol(log_file)
-    protocol = protocol or "file"
-    # create filesystem
-    fs_options = fs_options.copy()
-    fs_options.update({"asynchronous": True, "loop": asyncio.get_event_loop()})
-    return fsspec.filesystem(protocol, **fs_options)
+def eval_log_json_str(log: EvalLog) -> str:
+    return eval_log_json(log).decode()

inspect-ai 0.3.48__py3-none-any.whl → 0.3.50__py3-none-any.whl

inspect-ai 0.3.48py3-none-any.whl → 0.3.50py3-none-any.whl