inspect-ai 0.3.52__py3-none-any.whl → 0.3.54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +55 -1
- inspect_ai/_cli/main.py +2 -0
- inspect_ai/_cli/trace.py +244 -0
- inspect_ai/_display/core/progress.py +9 -3
- inspect_ai/_display/core/results.py +8 -4
- inspect_ai/_display/textual/app.py +5 -1
- inspect_ai/_display/textual/widgets/task_detail.py +3 -0
- inspect_ai/_display/textual/widgets/tasks.py +97 -6
- inspect_ai/_eval/eval.py +33 -0
- inspect_ai/_eval/evalset.py +4 -0
- inspect_ai/_eval/registry.py +2 -2
- inspect_ai/_eval/task/images.py +4 -14
- inspect_ai/_eval/task/results.py +22 -4
- inspect_ai/_eval/task/run.py +40 -20
- inspect_ai/_eval/task/sandbox.py +72 -43
- inspect_ai/_eval/task/task.py +4 -0
- inspect_ai/_eval/task/util.py +2 -0
- inspect_ai/_util/constants.py +3 -3
- inspect_ai/_util/display.py +1 -0
- inspect_ai/_util/logger.py +34 -8
- inspect_ai/_util/trace.py +275 -0
- inspect_ai/_view/www/App.css +13 -0
- inspect_ai/_view/www/dist/assets/index.css +13 -0
- inspect_ai/_view/www/dist/assets/index.js +80 -43
- inspect_ai/_view/www/src/App.mjs +31 -6
- inspect_ai/_view/www/src/Types.mjs +6 -0
- inspect_ai/_view/www/src/components/JsonPanel.mjs +11 -17
- inspect_ai/_view/www/src/components/MessageContent.mjs +9 -2
- inspect_ai/_view/www/src/components/Tools.mjs +46 -18
- inspect_ai/_view/www/src/navbar/Navbar.mjs +12 -0
- inspect_ai/_view/www/src/samples/SampleList.mjs +2 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +2 -2
- inspect_ai/log/_log.py +6 -0
- inspect_ai/log/_message.py +2 -2
- inspect_ai/log/_recorders/eval.py +8 -18
- inspect_ai/log/_recorders/json.py +19 -17
- inspect_ai/model/_cache.py +22 -16
- inspect_ai/model/_call_tools.py +9 -1
- inspect_ai/model/_generate_config.py +8 -2
- inspect_ai/model/_model.py +11 -12
- inspect_ai/model/_providers/azureai.py +1 -1
- inspect_ai/model/_providers/bedrock.py +18 -2
- inspect_ai/model/_providers/hf.py +1 -1
- inspect_ai/model/_providers/openai.py +32 -8
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/vllm.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +1 -1
- inspect_ai/util/_sandbox/context.py +7 -3
- inspect_ai/util/_sandbox/docker/compose.py +58 -19
- inspect_ai/util/_sandbox/docker/config.py +8 -10
- inspect_ai/util/_sandbox/docker/docker.py +20 -16
- inspect_ai/util/_sandbox/docker/util.py +3 -9
- inspect_ai/util/_sandbox/environment.py +7 -2
- inspect_ai/util/_sandbox/limits.py +1 -1
- inspect_ai/util/_sandbox/local.py +8 -9
- inspect_ai/util/_sandbox/service.py +17 -7
- inspect_ai/util/_subprocess.py +6 -1
- inspect_ai/util/_subtask.py +8 -2
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/METADATA +6 -8
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/RECORD +64 -62
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/task/sandbox.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
import asyncio
|
2
2
|
import base64
|
3
3
|
import contextlib
|
4
|
-
from
|
4
|
+
from random import random
|
5
|
+
from typing import AsyncGenerator, Callable, NamedTuple, cast
|
5
6
|
|
6
7
|
from inspect_ai._eval.task.task import Task
|
7
8
|
from inspect_ai._eval.task.util import task_run_dir
|
@@ -9,6 +10,7 @@ from inspect_ai._util.file import file, filesystem
|
|
9
10
|
from inspect_ai._util.registry import registry_unqualified_name
|
10
11
|
from inspect_ai._util.url import data_uri_to_base64, is_data_uri
|
11
12
|
from inspect_ai.dataset import Sample
|
13
|
+
from inspect_ai.util._concurrency import concurrency
|
12
14
|
from inspect_ai.util._sandbox.context import (
|
13
15
|
cleanup_sandbox_environments_sample,
|
14
16
|
init_sandbox_environments_sample,
|
@@ -18,12 +20,14 @@ from inspect_ai.util._sandbox.environment import (
|
|
18
20
|
SandboxEnvironmentConfigType,
|
19
21
|
SandboxEnvironmentSpec,
|
20
22
|
)
|
23
|
+
from inspect_ai.util._sandbox.registry import registry_find_sandboxenv
|
21
24
|
|
22
25
|
|
23
26
|
@contextlib.asynccontextmanager
|
24
27
|
async def sandboxenv_context(
|
25
28
|
task_name: str,
|
26
29
|
sandbox: SandboxEnvironmentSpec | None,
|
30
|
+
max_sandboxes: int | None,
|
27
31
|
cleanup: bool,
|
28
32
|
sample: Sample,
|
29
33
|
) -> AsyncGenerator[None, None]:
|
@@ -32,52 +36,77 @@ async def sandboxenv_context(
|
|
32
36
|
if not sandbox:
|
33
37
|
raise ValueError("sandboxenv_context called with no sandbox specified")
|
34
38
|
|
35
|
-
#
|
36
|
-
|
37
|
-
if sample.files:
|
38
|
-
for path, contents in sample.files.items():
|
39
|
-
files[path] = read_sandboxenv_file(contents)
|
40
|
-
|
41
|
-
# read setup script from sample (add bash shebang if necessary)
|
42
|
-
setup: bytes | None = None
|
43
|
-
if sample.setup:
|
44
|
-
setup = read_sandboxenv_file(sample.setup)
|
45
|
-
setup_str = setup.decode(encoding="utf-8")
|
46
|
-
if not setup_str.strip().startswith("#!"):
|
47
|
-
setup_str = f"#!/usr/bin/env bash\n\n{setup_str}"
|
48
|
-
setup = setup_str.encode(encoding="utf-8")
|
49
|
-
|
50
|
-
interrupted = False
|
51
|
-
environments: dict[str, SandboxEnvironment] | None = None
|
52
|
-
try:
|
53
|
-
# initialize sandbox environment,
|
54
|
-
environments = await init_sandbox_environments_sample(
|
55
|
-
type=sandbox.type,
|
56
|
-
task_name=registry_unqualified_name(task_name),
|
57
|
-
config=sandbox.config,
|
58
|
-
files=files,
|
59
|
-
setup=setup,
|
60
|
-
metadata=sample.metadata if sample.metadata else {},
|
61
|
-
)
|
62
|
-
|
63
|
-
# run sample
|
64
|
-
yield
|
65
|
-
|
66
|
-
except asyncio.CancelledError as ex:
|
67
|
-
interrupted = True
|
68
|
-
raise ex
|
39
|
+
# get sandboxenv_type
|
40
|
+
sandboxenv_type = registry_find_sandboxenv(sandbox.type)
|
69
41
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
42
|
+
# see if there is a max_sandboxes in play (passed or from type)
|
43
|
+
if max_sandboxes is None:
|
44
|
+
default_concurrency_fn = cast(
|
45
|
+
Callable[[], int | None], getattr(sandboxenv_type, "default_concurrency")
|
46
|
+
)
|
47
|
+
max_sandboxes = default_concurrency_fn()
|
48
|
+
|
49
|
+
# if we are enforcing max_sandboxes, then when samples are scheduled they may
|
50
|
+
# not get interleaved properly across tasks (because the first task will come
|
51
|
+
# in and grab all of the sandboxes). Therefore, in this case we wait a random
|
52
|
+
# delay so that all tasks/samples have an equal shot at getting scheduled.
|
53
|
+
if max_sandboxes is not None:
|
54
|
+
await asyncio.sleep(random())
|
55
|
+
|
56
|
+
# enforce concurrency if required
|
57
|
+
sandboxes_cm = (
|
58
|
+
concurrency(sandbox.type, max_sandboxes, f"sandboxes/{sandbox.type}")
|
59
|
+
if max_sandboxes is not None
|
60
|
+
else contextlib.nullcontext()
|
61
|
+
)
|
62
|
+
|
63
|
+
async with sandboxes_cm:
|
64
|
+
# read files from sample
|
65
|
+
files: dict[str, bytes] = {}
|
66
|
+
if sample.files:
|
67
|
+
for path, contents in sample.files.items():
|
68
|
+
files[path] = read_sandboxenv_file(contents)
|
69
|
+
|
70
|
+
# read setup script from sample (add bash shebang if necessary)
|
71
|
+
setup: bytes | None = None
|
72
|
+
if sample.setup:
|
73
|
+
setup = read_sandboxenv_file(sample.setup)
|
74
|
+
setup_str = setup.decode(encoding="utf-8")
|
75
|
+
if not setup_str.strip().startswith("#!"):
|
76
|
+
setup_str = f"#!/usr/bin/env bash\n\n{setup_str}"
|
77
|
+
setup = setup_str.encode(encoding="utf-8")
|
78
|
+
|
79
|
+
interrupted = False
|
80
|
+
environments: dict[str, SandboxEnvironment] | None = None
|
81
|
+
try:
|
82
|
+
# initialize sandbox environment,
|
83
|
+
environments = await init_sandbox_environments_sample(
|
84
|
+
sandboxenv_type=sandboxenv_type,
|
85
|
+
task_name=registry_unqualified_name(task_name),
|
76
86
|
config=sandbox.config,
|
77
|
-
|
78
|
-
|
87
|
+
files=files,
|
88
|
+
setup=setup,
|
89
|
+
metadata=sample.metadata if sample.metadata else {},
|
79
90
|
)
|
80
91
|
|
92
|
+
# run sample
|
93
|
+
yield
|
94
|
+
|
95
|
+
except asyncio.CancelledError as ex:
|
96
|
+
interrupted = True
|
97
|
+
raise ex
|
98
|
+
|
99
|
+
finally:
|
100
|
+
# cleanup sandbox environment
|
101
|
+
if environments and cleanup:
|
102
|
+
await cleanup_sandbox_environments_sample(
|
103
|
+
type=sandbox.type,
|
104
|
+
task_name=task_name,
|
105
|
+
config=sandbox.config,
|
106
|
+
environments=environments,
|
107
|
+
interrupted=interrupted,
|
108
|
+
)
|
109
|
+
|
81
110
|
|
82
111
|
def read_sandboxenv_file(contents: str) -> bytes:
|
83
112
|
if is_data_uri(contents):
|
inspect_ai/_eval/task/task.py
CHANGED
@@ -39,6 +39,8 @@ class Task:
|
|
39
39
|
|
40
40
|
Args:
|
41
41
|
dataset (Dataset | Sequence[Sample]): Dataset to evaluate
|
42
|
+
setup: (Solver | list[Solver] | None): Setup step (always run
|
43
|
+
even when the main `solver` is replaced).
|
42
44
|
solver: (Solver | list[Solver]): Solver or list of solvers.
|
43
45
|
Defaults to generate(), a normal call to the model.
|
44
46
|
scorer: (Scorer | list[Scorer] | None): Scorer used to evaluate model output.
|
@@ -68,6 +70,7 @@ class Task:
|
|
68
70
|
def __init__(
|
69
71
|
self,
|
70
72
|
dataset: Dataset | Sequence[Sample] | None = None,
|
73
|
+
setup: Solver | list[Solver] | None = None,
|
71
74
|
solver: Solver | list[Solver] = generate(),
|
72
75
|
scorer: Scorer | list[Scorer] | None = None,
|
73
76
|
metrics: list[Metric] | dict[str, list[Metric]] | None = None,
|
@@ -119,6 +122,7 @@ class Task:
|
|
119
122
|
self.dataset: Dataset = (
|
120
123
|
dataset if isinstance(dataset, Dataset) else MemoryDataset(list(dataset))
|
121
124
|
)
|
125
|
+
self.setup = setup
|
122
126
|
self.solver = chain(solver) if isinstance(solver, list) else solver
|
123
127
|
self.scorer = (
|
124
128
|
scorer
|
inspect_ai/_eval/task/util.py
CHANGED
@@ -42,6 +42,8 @@ def slice_dataset(
|
|
42
42
|
sample_id: str | int | list[str | int] | None,
|
43
43
|
) -> Dataset:
|
44
44
|
def normalise(id: str | int | None) -> str:
|
45
|
+
if isinstance(id, str) and id.isdigit():
|
46
|
+
id = int(id)
|
45
47
|
return id if isinstance(id, str) else str(id).zfill(20)
|
46
48
|
|
47
49
|
if sample_id is not None:
|
inspect_ai/_util/constants.py
CHANGED
@@ -14,12 +14,12 @@ DEFAULT_VIEW_PORT = 7575
|
|
14
14
|
DEFAULT_SERVER_HOST = "127.0.0.1"
|
15
15
|
HTTP = 15
|
16
16
|
HTTP_LOG_LEVEL = "HTTP"
|
17
|
-
|
18
|
-
|
17
|
+
TRACE = 13
|
18
|
+
TRACE_LOG_LEVEL = "TRACE"
|
19
19
|
ALL_LOG_LEVELS = [
|
20
20
|
"DEBUG",
|
21
|
+
TRACE_LOG_LEVEL,
|
21
22
|
HTTP_LOG_LEVEL,
|
22
|
-
SANDBOX_LOG_LEVEL,
|
23
23
|
"INFO",
|
24
24
|
"WARNING",
|
25
25
|
"ERROR",
|
inspect_ai/_util/display.py
CHANGED
@@ -14,6 +14,7 @@ _display_type: DisplayType | None = None
|
|
14
14
|
|
15
15
|
def init_display_type(display: str | None = None) -> DisplayType:
|
16
16
|
global _display_type
|
17
|
+
global _display_metrics
|
17
18
|
display = (
|
18
19
|
display or os.environ.get("INSPECT_DISPLAY", DEFAULT_DISPLAY).lower().strip()
|
19
20
|
)
|
inspect_ai/_util/logger.py
CHANGED
@@ -11,6 +11,7 @@ from logging import (
|
|
11
11
|
getLevelName,
|
12
12
|
getLogger,
|
13
13
|
)
|
14
|
+
from pathlib import Path
|
14
15
|
|
15
16
|
import rich
|
16
17
|
from rich.console import ConsoleRenderable
|
@@ -18,17 +19,20 @@ from rich.logging import RichHandler
|
|
18
19
|
from rich.text import Text
|
19
20
|
from typing_extensions import override
|
20
21
|
|
21
|
-
from
|
22
|
+
from .constants import (
|
22
23
|
ALL_LOG_LEVELS,
|
23
24
|
DEFAULT_LOG_LEVEL,
|
24
25
|
DEFAULT_LOG_LEVEL_TRANSCRIPT,
|
25
26
|
HTTP,
|
26
27
|
HTTP_LOG_LEVEL,
|
27
28
|
PKG_NAME,
|
28
|
-
|
29
|
-
|
29
|
+
TRACE,
|
30
|
+
TRACE_LOG_LEVEL,
|
30
31
|
)
|
31
|
-
from
|
32
|
+
from .error import PrerequisiteError
|
33
|
+
from .trace import TraceFileHandler, TraceFormatter, inspect_trace_dir
|
34
|
+
|
35
|
+
TRACE_FILE_NAME = "trace.log"
|
32
36
|
|
33
37
|
|
34
38
|
# log handler that filters messages to stderr and the log file
|
@@ -52,6 +56,24 @@ class LogHandler(RichHandler):
|
|
52
56
|
else:
|
53
57
|
self.file_logger_level = 0
|
54
58
|
|
59
|
+
# add a trace handler
|
60
|
+
default_trace_file = inspect_trace_dir() / TRACE_FILE_NAME
|
61
|
+
have_existing_trace_file = default_trace_file.exists()
|
62
|
+
env_trace_file = os.environ.get("INSPECT_TRACE_FILE", None)
|
63
|
+
trace_file = Path(env_trace_file) if env_trace_file else default_trace_file
|
64
|
+
trace_total_files = 10
|
65
|
+
self.trace_logger = TraceFileHandler(
|
66
|
+
trace_file.as_posix(),
|
67
|
+
backupCount=trace_total_files - 1, # exclude the current file (10 total)
|
68
|
+
)
|
69
|
+
self.trace_logger.setFormatter(TraceFormatter())
|
70
|
+
if have_existing_trace_file:
|
71
|
+
self.trace_logger.doRollover()
|
72
|
+
|
73
|
+
# set trace level
|
74
|
+
trace_level = os.environ.get("INSPECT_TRACE_LEVEL", TRACE_LOG_LEVEL)
|
75
|
+
self.trace_logger_level = int(getLevelName(trace_level.upper()))
|
76
|
+
|
55
77
|
@override
|
56
78
|
def emit(self, record: LogRecord) -> None:
|
57
79
|
# demote httpx and return notifications to log_level http
|
@@ -79,6 +101,10 @@ class LogHandler(RichHandler):
|
|
79
101
|
):
|
80
102
|
self.file_logger.emit(record)
|
81
103
|
|
104
|
+
# write to trace if the trace level matches.
|
105
|
+
if self.trace_logger and record.levelno >= self.trace_logger_level:
|
106
|
+
self.trace_logger.emit(record)
|
107
|
+
|
82
108
|
# eval log always gets info level and higher records
|
83
109
|
# eval log only gets debug or http if we opt-in
|
84
110
|
write = record.levelno >= self.transcript_levelno
|
@@ -95,12 +121,12 @@ def init_logger(
|
|
95
121
|
log_level: str | None = None, log_level_transcript: str | None = None
|
96
122
|
) -> None:
|
97
123
|
# backwards compatibility for 'tools'
|
98
|
-
if log_level == "tools":
|
99
|
-
log_level = "
|
124
|
+
if log_level == "sandbox" or log_level == "tools":
|
125
|
+
log_level = "trace"
|
100
126
|
|
101
127
|
# register http and tools levels
|
102
128
|
addLevelName(HTTP, HTTP_LOG_LEVEL)
|
103
|
-
addLevelName(
|
129
|
+
addLevelName(TRACE, TRACE_LOG_LEVEL)
|
104
130
|
|
105
131
|
def validate_level(option: str, level: str) -> None:
|
106
132
|
if level not in ALL_LOG_LEVELS:
|
@@ -134,7 +160,7 @@ def init_logger(
|
|
134
160
|
getLogger().addHandler(_logHandler)
|
135
161
|
|
136
162
|
# establish default capture level
|
137
|
-
capture_level = min(
|
163
|
+
capture_level = min(TRACE, levelno)
|
138
164
|
|
139
165
|
# see all the messages (we won't actually display/write all of them)
|
140
166
|
getLogger().setLevel(capture_level)
|
@@ -0,0 +1,275 @@
|
|
1
|
+
import asyncio
|
2
|
+
import datetime
|
3
|
+
import gzip
|
4
|
+
import json
|
5
|
+
import logging
|
6
|
+
import os
|
7
|
+
import shutil
|
8
|
+
import time
|
9
|
+
import traceback
|
10
|
+
from contextlib import contextmanager
|
11
|
+
from logging import Logger
|
12
|
+
from logging.handlers import RotatingFileHandler
|
13
|
+
from pathlib import Path
|
14
|
+
from typing import Any, Generator, Literal, TextIO
|
15
|
+
|
16
|
+
import jsonlines
|
17
|
+
from pydantic import BaseModel, Field, JsonValue
|
18
|
+
from shortuuid import uuid
|
19
|
+
|
20
|
+
from .appdirs import inspect_data_dir
|
21
|
+
from .constants import TRACE
|
22
|
+
|
23
|
+
|
24
|
+
def inspect_trace_dir() -> Path:
|
25
|
+
return inspect_data_dir("traces")
|
26
|
+
|
27
|
+
|
28
|
+
@contextmanager
|
29
|
+
def trace_action(
|
30
|
+
logger: Logger, action: str, message: str, *args: Any, **kwargs: Any
|
31
|
+
) -> Generator[None, None, None]:
|
32
|
+
trace_id = uuid()
|
33
|
+
start_monotonic = time.monotonic()
|
34
|
+
start_wall = time.time()
|
35
|
+
pid = os.getpid()
|
36
|
+
detail = message % args if args else message % kwargs if kwargs else message
|
37
|
+
|
38
|
+
def trace_message(event: str) -> str:
|
39
|
+
return f"{action}: {detail} ({event})"
|
40
|
+
|
41
|
+
logger.log(
|
42
|
+
TRACE,
|
43
|
+
trace_message("enter"),
|
44
|
+
extra={
|
45
|
+
"action": action,
|
46
|
+
"detail": detail,
|
47
|
+
"event": "enter",
|
48
|
+
"trace_id": str(trace_id),
|
49
|
+
"start_time": start_wall,
|
50
|
+
"pid": pid,
|
51
|
+
},
|
52
|
+
)
|
53
|
+
|
54
|
+
try:
|
55
|
+
yield
|
56
|
+
duration = time.monotonic() - start_monotonic
|
57
|
+
logger.log(
|
58
|
+
TRACE,
|
59
|
+
trace_message("exit"),
|
60
|
+
extra={
|
61
|
+
"action": action,
|
62
|
+
"detail": detail,
|
63
|
+
"event": "exit",
|
64
|
+
"trace_id": str(trace_id),
|
65
|
+
"duration": duration,
|
66
|
+
"pid": pid,
|
67
|
+
},
|
68
|
+
)
|
69
|
+
except (KeyboardInterrupt, asyncio.CancelledError):
|
70
|
+
duration = time.monotonic() - start_monotonic
|
71
|
+
logger.log(
|
72
|
+
TRACE,
|
73
|
+
trace_message("cancel"),
|
74
|
+
extra={
|
75
|
+
"action": action,
|
76
|
+
"detail": detail,
|
77
|
+
"event": "cancel",
|
78
|
+
"trace_id": str(trace_id),
|
79
|
+
"duration": duration,
|
80
|
+
"pid": pid,
|
81
|
+
},
|
82
|
+
)
|
83
|
+
raise
|
84
|
+
except TimeoutError:
|
85
|
+
duration = time.monotonic() - start_monotonic
|
86
|
+
logger.log(
|
87
|
+
TRACE,
|
88
|
+
trace_message("timeout"),
|
89
|
+
extra={
|
90
|
+
"action": action,
|
91
|
+
"detail": detail,
|
92
|
+
"event": "timeout",
|
93
|
+
"trace_id": str(trace_id),
|
94
|
+
"duration": duration,
|
95
|
+
"pid": pid,
|
96
|
+
},
|
97
|
+
)
|
98
|
+
raise
|
99
|
+
except Exception as ex:
|
100
|
+
duration = time.monotonic() - start_monotonic
|
101
|
+
logger.log(
|
102
|
+
TRACE,
|
103
|
+
trace_message("error"),
|
104
|
+
extra={
|
105
|
+
"action": action,
|
106
|
+
"detail": detail,
|
107
|
+
"event": "error",
|
108
|
+
"trace_id": str(trace_id),
|
109
|
+
"duration": duration,
|
110
|
+
"error": getattr(ex, "message", str(ex)) or repr(ex),
|
111
|
+
"error_type": type(ex).__name__,
|
112
|
+
"stacktrace": traceback.format_exc(),
|
113
|
+
"pid": pid,
|
114
|
+
},
|
115
|
+
)
|
116
|
+
raise
|
117
|
+
|
118
|
+
|
119
|
+
def trace_message(
|
120
|
+
logger: Logger, category: str, message: str, *args: Any, **kwargs: Any
|
121
|
+
) -> None:
|
122
|
+
logger.log(TRACE, f"[{category}] {message}", *args, **kwargs)
|
123
|
+
|
124
|
+
|
125
|
+
class TraceFormatter(logging.Formatter):
|
126
|
+
def format(self, record: logging.LogRecord) -> str:
|
127
|
+
# Base log entry with standard fields
|
128
|
+
output: dict[str, JsonValue] = {
|
129
|
+
"timestamp": self.formatTime(record),
|
130
|
+
"level": record.levelname,
|
131
|
+
"message": record.getMessage(), # This handles the % formatting of the message
|
132
|
+
}
|
133
|
+
|
134
|
+
# Add basic context its not a TRACE message
|
135
|
+
if record.levelname != "TRACE":
|
136
|
+
if hasattr(record, "module"):
|
137
|
+
output["module"] = record.module
|
138
|
+
if hasattr(record, "funcName"):
|
139
|
+
output["function"] = record.funcName
|
140
|
+
if hasattr(record, "lineno"):
|
141
|
+
output["line"] = record.lineno
|
142
|
+
|
143
|
+
# Add any structured fields from extra
|
144
|
+
elif hasattr(record, "action"):
|
145
|
+
# This is a trace_action log
|
146
|
+
for key in [
|
147
|
+
"action",
|
148
|
+
"detail",
|
149
|
+
"event",
|
150
|
+
"trace_id",
|
151
|
+
"start_time",
|
152
|
+
"duration",
|
153
|
+
"error",
|
154
|
+
"error_type",
|
155
|
+
"stacktrace",
|
156
|
+
"pid",
|
157
|
+
]:
|
158
|
+
if hasattr(record, key):
|
159
|
+
output[key] = getattr(record, key)
|
160
|
+
|
161
|
+
# Handle any unexpected extra attributes
|
162
|
+
for key, value in record.__dict__.items():
|
163
|
+
if key not in output and key not in (
|
164
|
+
"args",
|
165
|
+
"lineno",
|
166
|
+
"funcName",
|
167
|
+
"module",
|
168
|
+
"asctime",
|
169
|
+
"created",
|
170
|
+
"exc_info",
|
171
|
+
"exc_text",
|
172
|
+
"filename",
|
173
|
+
"levelno",
|
174
|
+
"levelname",
|
175
|
+
"msecs",
|
176
|
+
"msg",
|
177
|
+
"name",
|
178
|
+
"pathname",
|
179
|
+
"process",
|
180
|
+
"processName",
|
181
|
+
"relativeCreated",
|
182
|
+
"stack_info",
|
183
|
+
"thread",
|
184
|
+
"threadName",
|
185
|
+
):
|
186
|
+
output[key] = value
|
187
|
+
|
188
|
+
return json.dumps(
|
189
|
+
output, default=str
|
190
|
+
) # default=str handles non-serializable objects
|
191
|
+
|
192
|
+
def formatTime(self, record: logging.LogRecord, datefmt: str | None = None) -> str:
|
193
|
+
# ISO format with timezone
|
194
|
+
dt = datetime.datetime.fromtimestamp(record.created)
|
195
|
+
return dt.isoformat()
|
196
|
+
|
197
|
+
|
198
|
+
class TraceRecord(BaseModel):
|
199
|
+
timestamp: str
|
200
|
+
level: str
|
201
|
+
message: str
|
202
|
+
|
203
|
+
|
204
|
+
class SimpleTraceRecord(TraceRecord):
|
205
|
+
action: None = Field(default=None)
|
206
|
+
|
207
|
+
|
208
|
+
class ActionTraceRecord(TraceRecord):
|
209
|
+
action: str
|
210
|
+
event: Literal["enter", "cancel", "error", "timeout", "exit"]
|
211
|
+
trace_id: str
|
212
|
+
detail: str = Field(default="")
|
213
|
+
start_time: float | None = Field(default=None)
|
214
|
+
duration: float | None = Field(default=None)
|
215
|
+
error: str | None = Field(default=None)
|
216
|
+
error_type: str | None = Field(default=None)
|
217
|
+
stacktrace: str | None = Field(default=None)
|
218
|
+
pid: int | None = Field(default=None)
|
219
|
+
|
220
|
+
|
221
|
+
def read_trace_file(file: Path) -> list[TraceRecord]:
|
222
|
+
def read_file(f: TextIO) -> list[TraceRecord]:
|
223
|
+
jsonlines_reader = jsonlines.Reader(f)
|
224
|
+
trace_records: list[TraceRecord] = []
|
225
|
+
for trace in jsonlines_reader.iter(type=dict):
|
226
|
+
if "action" in trace:
|
227
|
+
trace_records.append(ActionTraceRecord(**trace))
|
228
|
+
else:
|
229
|
+
trace_records.append(SimpleTraceRecord(**trace))
|
230
|
+
return trace_records
|
231
|
+
|
232
|
+
if file.name.endswith(".gz"):
|
233
|
+
with gzip.open(file, "rt") as f:
|
234
|
+
return read_file(f)
|
235
|
+
else:
|
236
|
+
with open(file, "r") as f:
|
237
|
+
return read_file(f)
|
238
|
+
|
239
|
+
|
240
|
+
class TraceFileHandler(RotatingFileHandler):
|
241
|
+
def __init__(
|
242
|
+
self,
|
243
|
+
filename: str,
|
244
|
+
mode: str = "a",
|
245
|
+
maxBytes: int = 0,
|
246
|
+
backupCount: int = 0,
|
247
|
+
encoding: str | None = None,
|
248
|
+
delay: bool = False,
|
249
|
+
) -> None:
|
250
|
+
super().__init__(filename, mode, maxBytes, backupCount, encoding, delay)
|
251
|
+
|
252
|
+
def rotation_filename(self, default_name: str) -> str:
|
253
|
+
"""
|
254
|
+
Returns the name of the rotated file.
|
255
|
+
|
256
|
+
Args:
|
257
|
+
default_name: The default name that would be used for rotation
|
258
|
+
|
259
|
+
Returns:
|
260
|
+
The modified filename with .gz extension
|
261
|
+
"""
|
262
|
+
return default_name + ".gz"
|
263
|
+
|
264
|
+
def rotate(self, source: str, dest: str) -> None:
|
265
|
+
"""
|
266
|
+
Compresses the source file and moves it to destination.
|
267
|
+
|
268
|
+
Args:
|
269
|
+
source: The source file to be compressed
|
270
|
+
dest: The destination path for the compressed file
|
271
|
+
"""
|
272
|
+
with open(source, "rb") as f_in:
|
273
|
+
with gzip.open(dest, "wb") as f_out:
|
274
|
+
shutil.copyfileobj(f_in, f_out)
|
275
|
+
os.remove(source)
|
inspect_ai/_view/www/App.css
CHANGED
@@ -711,6 +711,19 @@ pre[class*="language-"].tool-output,
|
|
711
711
|
background-color: #333333;
|
712
712
|
}
|
713
713
|
|
714
|
+
pre[class*="language-"].tool-output {
|
715
|
+
border: none !important;
|
716
|
+
box-shadow: none !important;
|
717
|
+
border-radius: var(--bs-border-radius) !important;
|
718
|
+
}
|
719
|
+
|
720
|
+
.vscode-dark pre.jsonPanel {
|
721
|
+
background: none !important;
|
722
|
+
border: none !important;
|
723
|
+
box-shadow: none !important;
|
724
|
+
border-radius: var(--bs-border-radius) !important;
|
725
|
+
}
|
726
|
+
|
714
727
|
|
715
728
|
/* jsondiffpatch */
|
716
729
|
|
@@ -14984,6 +14984,19 @@ pre[class*="language-"].tool-output,
|
|
14984
14984
|
background-color: #333333;
|
14985
14985
|
}
|
14986
14986
|
|
14987
|
+
pre[class*="language-"].tool-output {
|
14988
|
+
border: none !important;
|
14989
|
+
box-shadow: none !important;
|
14990
|
+
border-radius: var(--bs-border-radius) !important;
|
14991
|
+
}
|
14992
|
+
|
14993
|
+
.vscode-dark pre.jsonPanel {
|
14994
|
+
background: none !important;
|
14995
|
+
border: none !important;
|
14996
|
+
box-shadow: none !important;
|
14997
|
+
border-radius: var(--bs-border-radius) !important;
|
14998
|
+
}
|
14999
|
+
|
14987
15000
|
|
14988
15001
|
/* jsondiffpatch */
|
14989
15002
|
|