PyPI - inspect-ai - Versions diffs - 0.3.49__py3-none-any.whl → 0.3.51__py3-none-any.whl - Mend

inspect-ai 0.3.49py3-none-any.whl → 0.3.51py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

inspect_ai/_cli/info.py +2 -2
inspect_ai/_cli/log.py +2 -2
inspect_ai/_cli/score.py +2 -2
inspect_ai/_display/core/display.py +19 -0
inspect_ai/_display/core/panel.py +37 -7
inspect_ai/_display/core/progress.py +29 -2
inspect_ai/_display/core/results.py +79 -40
inspect_ai/_display/core/textual.py +21 -0
inspect_ai/_display/rich/display.py +28 -8
inspect_ai/_display/textual/app.py +107 -1
inspect_ai/_display/textual/display.py +1 -1
inspect_ai/_display/textual/widgets/samples.py +132 -91
inspect_ai/_display/textual/widgets/task_detail.py +236 -0
inspect_ai/_display/textual/widgets/tasks.py +74 -6
inspect_ai/_display/textual/widgets/toggle.py +32 -0
inspect_ai/_eval/context.py +2 -0
inspect_ai/_eval/eval.py +4 -3
inspect_ai/_eval/loader.py +1 -1
inspect_ai/_eval/run.py +35 -2
inspect_ai/_eval/task/log.py +13 -11
inspect_ai/_eval/task/results.py +12 -3
inspect_ai/_eval/task/run.py +139 -36
inspect_ai/_eval/task/sandbox.py +2 -1
inspect_ai/_util/_async.py +30 -1
inspect_ai/_util/file.py +31 -4
inspect_ai/_util/html.py +3 -0
inspect_ai/_util/logger.py +6 -5
inspect_ai/_util/platform.py +5 -6
inspect_ai/_util/registry.py +1 -1
inspect_ai/_view/server.py +9 -9
inspect_ai/_view/www/App.css +2 -2
inspect_ai/_view/www/dist/assets/index.css +2 -2
inspect_ai/_view/www/dist/assets/index.js +352 -294
inspect_ai/_view/www/log-schema.json +13 -0
inspect_ai/_view/www/package.json +1 -0
inspect_ai/_view/www/src/components/MessageBand.mjs +1 -1
inspect_ai/_view/www/src/components/Tools.mjs +16 -13
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -3
inspect_ai/_view/www/src/samples/SampleScoreView.mjs +52 -77
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -13
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +15 -2
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +4 -2
inspect_ai/_view/www/src/types/log.d.ts +2 -0
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +2 -0
inspect_ai/_view/www/yarn.lock +9 -4
inspect_ai/approval/__init__.py +1 -1
inspect_ai/approval/_human/approver.py +35 -0
inspect_ai/approval/_human/console.py +62 -0
inspect_ai/approval/_human/manager.py +108 -0
inspect_ai/approval/_human/panel.py +233 -0
inspect_ai/approval/_human/util.py +51 -0
inspect_ai/dataset/_sources/hf.py +2 -2
inspect_ai/dataset/_sources/util.py +1 -1
inspect_ai/log/_file.py +106 -36
inspect_ai/log/_recorders/eval.py +226 -158
inspect_ai/log/_recorders/file.py +9 -6
inspect_ai/log/_recorders/json.py +35 -12
inspect_ai/log/_recorders/recorder.py +15 -15
inspect_ai/log/_samples.py +52 -0
inspect_ai/model/_model.py +14 -0
inspect_ai/model/_model_output.py +4 -0
inspect_ai/model/_providers/azureai.py +1 -1
inspect_ai/model/_providers/hf.py +106 -4
inspect_ai/model/_providers/util/__init__.py +2 -0
inspect_ai/model/_providers/util/hf_handler.py +200 -0
inspect_ai/scorer/_common.py +1 -1
inspect_ai/solver/_plan.py +0 -8
inspect_ai/solver/_task_state.py +18 -1
inspect_ai/solver/_use_tools.py +9 -1
inspect_ai/tool/_tool_def.py +2 -2
inspect_ai/tool/_tool_info.py +14 -2
inspect_ai/tool/_tool_params.py +2 -1
inspect_ai/tool/_tools/_execute.py +1 -1
inspect_ai/tool/_tools/_web_browser/_web_browser.py +6 -0
inspect_ai/util/__init__.py +5 -6
inspect_ai/util/_panel.py +91 -0
inspect_ai/util/_sandbox/__init__.py +2 -6
inspect_ai/util/_sandbox/context.py +4 -3
inspect_ai/util/_sandbox/docker/compose.py +12 -2
inspect_ai/util/_sandbox/docker/docker.py +19 -9
inspect_ai/util/_sandbox/docker/util.py +10 -2
inspect_ai/util/_sandbox/environment.py +47 -41
inspect_ai/util/_sandbox/local.py +15 -10
inspect_ai/util/_subprocess.py +43 -3
{inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/METADATA +2 -2
{inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/RECORD +90 -82
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
inspect_ai/approval/_human.py +0 -123
{inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/top_level.txt +0 -0

inspect_ai/_eval/task/run.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import asyncio
 import contextlib
 import sys
+import time
 from copy import deepcopy
 from dataclasses import dataclass, field
 from logging import getLogger
@@ -16,6 +17,7 @@ from inspect_ai._display import (
     TaskSuccess,
     display,
 )
+from inspect_ai._display.core.display import TaskDisplay, TaskDisplayMetric
 from inspect_ai._util.constants import (
     DEFAULT_EPOCHS,
     DEFAULT_MAX_CONNECTIONS,
@@ -40,7 +42,7 @@ from inspect_ai.log import (
     EvalStats,
 )
 from inspect_ai.log._condense import condense_sample
-from inspect_ai.log._file import eval_log_json
+from inspect_ai.log._file import eval_log_json_str
 from inspect_ai.log._log import EvalSampleLimit, EvalSampleReductions, eval_error
 from inspect_ai.log._samples import active_sample
 from inspect_ai.log._transcript import (
@@ -60,7 +62,8 @@ from inspect_ai.model import (
 )
 from inspect_ai.model._model import init_sample_model_usage, sample_model_usage
 from inspect_ai.scorer import Scorer, Target
-from inspect_ai.scorer._metric import SampleScore, Score
+from inspect_ai.scorer._metric import Metric, SampleScore, Score
+from inspect_ai.scorer._reducer.types import ScoreReducer
 from inspect_ai.scorer._score import init_scoring_context
 from inspect_ai.scorer._scorer import unique_scorer_name
 from inspect_ai.solver import Generate, Plan, TaskState
@@ -92,6 +95,12 @@ py_logger = getLogger(__name__)
 EvalSampleSource = Callable[[int | str, int], EvalSample | None]
+# Units allocated for sample progress - the total units
+# represents the total units of progress for an individual sample
+# the remainder are increments of progress within a sample (and
+# must sum to the total_progress_units when the sample is complete)
+SAMPLE_TOTAL_PROGRESS_UNITS = 1
 @dataclass
 class TaskRunOptions:
@@ -135,8 +144,6 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
         results: EvalResults | None = None
         reductions: list[EvalSampleReductions] | None = None
         stats = EvalStats(started_at=iso_now())
-        error: EvalError | None = None
-        cancelled = False
         # handle sample errors (raise as required)
         sample_error_handler = SampleErrorHandler(
@@ -183,11 +190,6 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
             else ["(none)"]
         )
-        # compute steps (steps = samples * steps in plan + 1 for scorer)
-        steps = len(samples) * (
-            len(plan.steps) + (1 if plan.finish else 0) + (1)  # scorer
-        )
         # compute an eval directory relative log location if we can
         if PurePath(logger.location).is_relative_to(PurePath(eval_wd)):
             log_location = PurePath(logger.location).relative_to(eval_wd).as_posix()
@@ -202,7 +204,7 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
             dataset=task.dataset.name or "(samples)",
             scorer=", ".join(scorer_profiles),
             samples=len(samples),
-            steps=steps,
+            steps=len(samples) * SAMPLE_TOTAL_PROGRESS_UNITS,
             eval_config=config,
             task_args=logger.eval.task_args,
             generate_config=generate_config,
@@ -213,12 +215,12 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
         with display().task(profile) as td:
             try:
                 # start the log
-                log_start(logger, plan, generate_config)
+                await log_start(logger, plan, generate_config)
                 with td.progress() as p:
                     # forward progress
-                    def progress() -> None:
-                        p.update(1)
+                    def progress(number: int) -> None:
+                        p.update(number)
                     # provide solvers a function that they can use to generate output
                     async def generate(
@@ -243,6 +245,28 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
                         config, generate_config, model.api
                     )
+                    # track when samples complete and update progress as we go
+                    progress_results: list[dict[str, SampleScore]] = []
+                    update_metrics_display = update_metrics_display_fn(td)
+                    def sample_complete(sample_score: dict[str, SampleScore]) -> None:
+                        # Capture the result
+                        progress_results.append(sample_score)
+                        # Increment the segment progress
+                        td.sample_complete(
+                            complete=len(progress_results), total=len(samples)
+                        )
+                        # Update metrics
+                        update_metrics_display(
+                            len(progress_results),
+                            progress_results,
+                            scorers,
+                            task.epochs_reducer,
+                            task.metrics,
+                        )
                     # create sample coroutines
                     sample_coroutines = [
                         task_run_sample(
@@ -259,6 +283,7 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
                             log_images=log_images,
                             sample_source=sample_source,
                             sample_error=sample_error_handler,
+                            sample_complete=sample_complete,
                             fails_on_error=(
                                 config.fail_on_error is None
                                 or config.fail_on_error is True
@@ -269,7 +294,18 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
                         for (sample, state) in zip(samples, states)
                     ]
-                    # run them in parallel (subject to config.max_samples)
+                    # initial progress
+                    td.sample_complete(complete=0, total=len(samples))
+                    # Update metrics to empty state
+                    update_metrics_display(
+                        len(progress_results),
+                        progress_results,
+                        scorers,
+                        task.epochs_reducer,
+                        task.metrics,
+                    )
                     sample_results = await asyncio.gather(*sample_coroutines)
                 # compute and record metrics if we have scores
@@ -291,6 +327,11 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
                 # collect eval data
                 collect_eval_data(stats)
+                # finish w/ success status
+                eval_log = await logger.log_finish(
+                    "success", stats, results, reductions
+                )
                 # display task summary
                 td.complete(
                     TaskSuccess(
@@ -301,12 +342,14 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
                 )
             except asyncio.CancelledError:
-                # flag as cancelled
-                cancelled = True
                 # collect eval data
                 collect_eval_data(stats)
+                # finish w/ cancelled status
+                eval_log = await logger.log_finish(
+                    "cancelled", stats, results, reductions
+                )
                 # display task cancelled
                 td.complete(TaskCancelled(logger.samples_completed, stats))
@@ -325,25 +368,22 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
                     # collect eval data
                     collect_eval_data(stats)
+                    # finish with error status
+                    eval_log = await logger.log_finish(
+                        "error", stats, results, reductions, error
+                    )
                     # display it
                     td.complete(
                         TaskError(logger.samples_completed, type, value, traceback)
                     )
-        # log as appropriate
-        if cancelled:
-            eval_log = logger.log_finish("cancelled", stats, results, reductions)
-        elif error:
-            eval_log = logger.log_finish("error", stats, results, reductions, error)
-        else:
-            eval_log = logger.log_finish("success", stats, results, reductions)
         # notify the view module that an eval just completed
         # (in case we have a view polling for new evals)
         view_notify_eval(logger.location)
         try:
-            await send_telemetry("eval_log", eval_log_json(eval_log))
+            await send_telemetry("eval_log", eval_log_json_str(eval_log))
         except Exception as ex:
             py_logger.warning(
                 f"Error occurred sending telemetry: {exception_message(ex)}"
@@ -353,6 +393,63 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
         return eval_log
+def update_metrics_display_fn(
+    td: TaskDisplay, initial_interval: float = 0, min_interval: float = 0.9
+) -> Callable[
+    [
+        int,
+        list[dict[str, SampleScore]],
+        list[Scorer] | None,
+        ScoreReducer | list[ScoreReducer] | None,
+        list[Metric] | dict[str, list[Metric]] | None,
+    ],
+    None,
+]:
+    next_compute_time = time.perf_counter() + initial_interval
+    def compute(
+        sample_count: int,
+        sample_scores: list[dict[str, SampleScore]],
+        scorers: list[Scorer] | None,
+        reducers: ScoreReducer | list[ScoreReducer] | None,
+        metrics: list[Metric] | dict[str, list[Metric]] | None,
+    ) -> None:
+        nonlocal next_compute_time
+        time_start = time.perf_counter()
+        if time_start >= next_compute_time:
+            # compute metrics
+            results, reductions = eval_results(
+                samples=sample_count,
+                scores=sample_scores,
+                reducers=reducers,
+                scorers=scorers,
+                metrics=metrics,
+            )
+            # Name, reducer, value
+            task_metrics = []
+            if len(results.scores) > 0:
+                for score in results.scores:
+                    for key, metric in score.metrics.items():
+                        task_metrics.append(
+                            TaskDisplayMetric(
+                                scorer=score.name,
+                                name=metric.name,
+                                value=metric.value,
+                                reducer=score.reducer,
+                            )
+                        )
+                td.update_metrics(task_metrics)
+            # determine how long to wait before recomputing metrics
+            time_end = time.perf_counter()
+            elapsed_time = time_end - time_start
+            wait = max(min_interval, elapsed_time * 10)
+            next_compute_time = time_end + wait
+    return compute
 async def task_run_sample(
     task_name: str,
     sample: Sample,
@@ -362,11 +459,12 @@ async def task_run_sample(
     plan: Plan,
     scorers: list[Scorer] | None,
     generate: Generate,
-    progress: Callable[..., None],
+    progress: Callable[[int], None],
     logger: TaskLogger | None,
     log_images: bool,
     sample_source: EvalSampleSource | None,
     sample_error: Callable[[BaseException], EvalError],
+    sample_complete: Callable[[dict[str, SampleScore]], None],
     fails_on_error: bool,
     time_limit: int | None,
     semaphore: asyncio.Semaphore | None,
@@ -375,12 +473,12 @@ async def task_run_sample(
     if sample_source and sample.id is not None:
         previous_sample = sample_source(sample.id, state.epoch)
         if previous_sample:
-            # tick off progress
-            for _ in range(0, len(plan.steps) + 1 + (1 if plan.finish else 0)):
-                progress()
+            # tick off progress for this sample
+            progress(SAMPLE_TOTAL_PROGRESS_UNITS)
             # log if requested
             if logger:
-                logger.log_sample(previous_sample, flush=False)
+                await logger.log_sample(previous_sample, flush=False)
             # return score
             if previous_sample.scores:
@@ -436,6 +534,9 @@ async def task_run_sample(
             model=str(state.model),
             sample=sample,
             epoch=state.epoch,
+            message_limit=state.message_limit,
+            token_limit=state.token_limit,
+            time_limit=time_limit,
             fails_on_error=fails_on_error,
             transcript=sample_transcript,
         ) as active,
@@ -454,7 +555,6 @@ async def task_run_sample(
                 )
                 # set progress for plan then run it
-                plan.progress = progress
                 state = await plan(state, generate)
         except TimeoutError:
@@ -562,7 +662,8 @@ async def task_run_sample(
             # handle error (this will throw if we've exceeded the limit)
             error = handle_error(ex)
-        progress()
+        # complete the sample
+        progress(SAMPLE_TOTAL_PROGRESS_UNITS)
         # log it
         if logger is not None:
@@ -576,7 +677,7 @@ async def task_run_sample(
                 state = state_without_base64_images(state)
             # log the sample
-            log_sample(
+            await log_sample(
                 logger=logger,
                 sample=sample,
                 state=state,
@@ -587,12 +688,14 @@ async def task_run_sample(
         # return
         if error is None:
+            if results is not None:
+                sample_complete(results)
             return results
         else:
             return None
-def log_sample(
+async def log_sample(
     logger: TaskLogger,
     sample: Sample,
     state: TaskState,
@@ -638,7 +741,7 @@ def log_sample(
         limit=limit,
     )
-    logger.log_sample(condense_sample(eval_sample, log_images), flush=True)
+    await logger.log_sample(condense_sample(eval_sample, log_images), flush=True)
 async def resolve_dataset(

inspect_ai/_eval/task/sandbox.py CHANGED Viewed

@@ -15,6 +15,7 @@ from inspect_ai.util._sandbox.context import (
 )
 from inspect_ai.util._sandbox.environment import (
     SandboxEnvironment,
+    SandboxEnvironmentConfigType,
     SandboxEnvironmentSpec,
 )
@@ -129,7 +130,7 @@ def resolve_sandbox(
             and sample.sandbox.type == task_sandbox.type
             and sample.sandbox.config is not None
         ):
-            sandbox_config: str | None = sample.sandbox.config
+            sandbox_config: SandboxEnvironmentConfigType | None = sample.sandbox.config
         else:
             sandbox_config = task_sandbox.config
         return SandboxEnvironmentSpec(task_sandbox.type, sandbox_config)

inspect_ai/_util/_async.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import asyncio
-from typing import Any
+from typing import Any, Coroutine, TypeVar
+import nest_asyncio  # type: ignore
 def is_callable_coroutine(func_or_cls: Any) -> bool:
@@ -8,3 +10,30 @@ def is_callable_coroutine(func_or_cls: Any) -> bool:
     elif callable(func_or_cls):
         return asyncio.iscoroutinefunction(func_or_cls.__call__)
     return False
+T = TypeVar("T")
+_initialised_nest_asyncio: bool = False
+def init_nest_asyncio() -> None:
+    global _initialised_nest_asyncio
+    if not _initialised_nest_asyncio:
+        nest_asyncio.apply()
+        _initialised_nest_asyncio = True
+def run_coroutine(coroutine: Coroutine[None, None, T]) -> T:
+    try:
+        # this will throw if there is no running loop
+        asyncio.get_running_loop()
+        # initialiase nest_asyncio then we are clear to run
+        init_nest_asyncio()
+        return asyncio.run(coroutine)
+    except RuntimeError:
+        # No running event loop so we are clear to run
+        return asyncio.run(coroutine)

inspect_ai/_util/file.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import asyncio
+import contextlib
 import datetime
 import io
 import os
@@ -7,11 +9,12 @@ import unicodedata
 from contextlib import contextmanager
 from copy import deepcopy
 from pathlib import Path
-from typing import Any, BinaryIO, Iterator, Literal, cast, overload
+from typing import Any, AsyncIterator, BinaryIO, Iterator, Literal, cast, overload
 from urllib.parse import urlparse
-import fsspec  # type: ignore
-from fsspec.core import split_protocol  # type: ignore
+import fsspec  # type: ignore  # type: ignore
+from fsspec.asyn import AsyncFileSystem  # type: ignore
+from fsspec.core import split_protocol  # type: ignore  # type: ignore
 from fsspec.implementations.local import make_path_posix  # type: ignore
 from pydantic import BaseModel
 from s3fs import S3FileSystem  # type: ignore
@@ -277,10 +280,34 @@ def filesystem(path: str, fs_options: dict[str, Any] = {}) -> FileSystem:
     options.update(fs_options)
     # create filesystem
-    fs, path = fsspec.core.url_to_fs(path)
+    fs, path = fsspec.core.url_to_fs(path, **options)
     return FileSystem(fs)
+@contextlib.asynccontextmanager
+async def async_fileystem(
+    location: str, fs_options: dict[str, Any] = {}
+) -> AsyncIterator[AsyncFileSystem]:
+    # determine protocol
+    protocol, _ = split_protocol(location)
+    protocol = protocol or "file"
+    # build options
+    options = default_fs_options(location)
+    options.update(fs_options)
+    if protocol == "s3":
+        s3 = S3FileSystem(asynchronous=True, **options)
+        session = await s3.set_session()
+        try:
+            yield s3
+        finally:
+            await session.close()
+    else:
+        options.update({"asynchronous": True, "loop": asyncio.get_event_loop()})
+        yield fsspec.filesystem(protocol, **options)
 def absolute_file_path(file: str) -> str:
     # check for a relative dir, if we find one then resolve to absolute
     fs_scheme = urlparse(file).scheme

inspect_ai/_util/html.py ADDED Viewed

@@ -0,0 +1,3 @@
+def as_html_id(prefix: str, text: str) -> str:
+    id = "".join(c if c.isalnum() else "-" for c in text.lower())
+    return f"{prefix}-{id}" if id[0].isdigit() else id

inspect_ai/_util/logger.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-from contextvars import ContextVar
 from logging import (
     INFO,
     WARNING,
@@ -154,19 +153,21 @@ def notify_logger_record(record: LogRecord, write: bool) -> None:
     if write:
         transcript()._event(LoggerEvent(message=LoggingMessage.from_log_record(record)))
+    global _rate_limit_count
     if record.levelno <= INFO and "429" in record.getMessage():
-        _rate_limit_count_context_var.set(_rate_limit_count_context_var.get() + 1)
+        _rate_limit_count = _rate_limit_count + 1
-_rate_limit_count_context_var = ContextVar[int]("rate_limit_count", default=0)
+_rate_limit_count = 0
 def init_http_rate_limit_count() -> None:
-    _rate_limit_count_context_var.set(0)
+    global _rate_limit_count
+    _rate_limit_count = 0
 def http_rate_limit_count() -> int:
-    return _rate_limit_count_context_var.get()
+    return _rate_limit_count
 def warn_once(logger: Logger, message: str) -> None:

inspect_ai/_util/platform.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import importlib.util
 import os
+from inspect_ai._util._async import init_nest_asyncio
 from .error import set_exception_hook
@@ -21,7 +23,7 @@ def platform_init() -> None:
     # set exception hook if we haven't already
     set_exception_hook()
-    # if we are running in a notebook, confirm that we have ipywidgets
+    # if we are running in a notebook...
     if running_in_notebook():
         # check for required packages
         if not have_package("ipywidgets"):
@@ -30,11 +32,8 @@ def platform_init() -> None:
                 + "pip install ipywidgets\n"
             )
-        # activate nest_asyncio (required so we operate properly within
-        # the Jupyter async event loop)
-        import nest_asyncio  # type: ignore
-        nest_asyncio.apply()
+        # setup nested asyncio
+        init_nest_asyncio()
 def have_package(package: str) -> bool:

inspect_ai/_util/registry.py CHANGED Viewed

@@ -84,7 +84,7 @@ def registry_tag(
             named_params[param] = registry_info(named_params[param]).name
         elif callable(named_params[param]) and hasattr(named_params[param], "__name__"):
             named_params[param] = getattr(named_params[param], "__name__")
-        elif isinstance(named_params[param], dict | list):
+        elif isinstance(named_params[param], dict | list | BaseModel):
             named_params[param] = to_jsonable_python(
                 named_params[param], fallback=lambda x: getattr(x, "__name__", None)
             )

inspect_ai/_view/server.py CHANGED Viewed

@@ -19,8 +19,8 @@ from inspect_ai.log._file import (
     EvalLogInfo,
     eval_log_json,
     list_eval_logs_async,
-    read_eval_log,
-    read_eval_log_headers,
+    read_eval_log_async,
+    read_eval_log_headers_async,
 )
 from .notify import view_last_eval_time
@@ -60,7 +60,7 @@ def view_server(
         # header_only is based on a size threshold
         header_only = request.query.get("header-only", None)
-        return log_file_response(file, header_only)
+        return await log_file_response(file, header_only)
     @routes.get("/api/log-size/{log}")
     async def api_log_size(request: web.Request) -> web.Response:
@@ -180,7 +180,7 @@ def log_listing_response(logs: list[EvalLogInfo], log_dir: str) -> web.Response:
     return web.json_response(response)
-def log_file_response(file: str, header_only_param: str | None) -> web.Response:
+async def log_file_response(file: str, header_only_param: str | None) -> web.Response:
     # resolve header_only
     header_only_mb = int(header_only_param) if header_only_param is not None else None
     header_only = resolve_header_only(file, header_only_mb)
@@ -189,8 +189,8 @@ def log_file_response(file: str, header_only_param: str | None) -> web.Response:
         contents: bytes | None = None
         if header_only:
             try:
-                log = read_eval_log(file, header_only=True)
-                contents = eval_log_json(log).encode()
+                log = await read_eval_log_async(file, header_only=True)
+                contents = eval_log_json(log)
             except ValueError as ex:
                 logger.info(
                     f"Unable to read headers from log file {file}: {ex}. "
@@ -198,8 +198,8 @@ def log_file_response(file: str, header_only_param: str | None) -> web.Response:
                 )
         if contents is None:  # normal read
-            log = read_eval_log(file, header_only=False)
-            contents = eval_log_json(log).encode()
+            log = await read_eval_log_async(file, header_only=False)
+            contents = eval_log_json(log)
         return web.Response(body=contents, content_type="application/json")
@@ -245,7 +245,7 @@ async def log_bytes_response(log_file: str, start: int, end: int) -> web.Respons
 async def log_headers_response(files: list[str]) -> web.Response:
-    headers = read_eval_log_headers(files)
+    headers = await read_eval_log_headers_async(files)
     return web.json_response(to_jsonable_python(headers, exclude_none=True))

inspect_ai/_view/www/App.css CHANGED Viewed

@@ -64,8 +64,8 @@ body[class^="vscode-"] {
   --bs-secondary-bg: var(--vscode-list-inactiveSelectionBackground);
   --bs-border-color: var(--vscode-editorGroup-border);
   --bs-card-border-color: var(--vscode-editorGroup-border);
-  --bs-warning-bg-subtle: var(--vscode-statusBarItem-warningBackground);
-  --bs-warning-text-emphasis: var(--vscode-statusBarItem-warningForeground);
+  --bs-warning-bg-subtle: var(--vscode-inputValidation-warningBackground);
+  --bs-warning-text-emphasis: var(--vscode-input-foreground);
   --inspect-find-background: var(--vscode-editorWidget-background);
   --inspect-find-foreground: var(--vscode-editorWidget-foreground);
   --inspect-input-background: var(--vscode-input-background);

inspect_ai/_view/www/dist/assets/index.css CHANGED Viewed

@@ -14337,8 +14337,8 @@ body[class^="vscode-"] {
   --bs-secondary-bg: var(--vscode-list-inactiveSelectionBackground);
   --bs-border-color: var(--vscode-editorGroup-border);
   --bs-card-border-color: var(--vscode-editorGroup-border);
-  --bs-warning-bg-subtle: var(--vscode-statusBarItem-warningBackground);
-  --bs-warning-text-emphasis: var(--vscode-statusBarItem-warningForeground);
+  --bs-warning-bg-subtle: var(--vscode-inputValidation-warningBackground);
+  --bs-warning-text-emphasis: var(--vscode-input-foreground);
   --inspect-find-background: var(--vscode-editorWidget-background);
   --inspect-find-foreground: var(--vscode-editorWidget-foreground);
   --inspect-input-background: var(--vscode-input-background);

inspect-ai 0.3.49__py3-none-any.whl → 0.3.51__py3-none-any.whl

inspect-ai 0.3.49py3-none-any.whl → 0.3.51py3-none-any.whl