inspect-ai 0.3.52__py3-none-any.whl → 0.3.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +29 -0
- inspect_ai/_display/core/progress.py +9 -3
- inspect_ai/_display/core/results.py +8 -4
- inspect_ai/_display/textual/widgets/task_detail.py +3 -0
- inspect_ai/_display/textual/widgets/tasks.py +86 -5
- inspect_ai/_eval/eval.py +16 -0
- inspect_ai/_eval/evalset.py +4 -0
- inspect_ai/_eval/registry.py +2 -2
- inspect_ai/_eval/task/results.py +22 -4
- inspect_ai/_eval/task/run.py +14 -10
- inspect_ai/_eval/task/sandbox.py +72 -43
- inspect_ai/_eval/task/task.py +4 -0
- inspect_ai/_eval/task/util.py +2 -0
- inspect_ai/_view/www/App.css +13 -0
- inspect_ai/_view/www/dist/assets/index.css +13 -0
- inspect_ai/_view/www/dist/assets/index.js +80 -43
- inspect_ai/_view/www/src/App.mjs +31 -6
- inspect_ai/_view/www/src/Types.mjs +6 -0
- inspect_ai/_view/www/src/components/JsonPanel.mjs +11 -17
- inspect_ai/_view/www/src/components/MessageContent.mjs +9 -2
- inspect_ai/_view/www/src/components/Tools.mjs +46 -18
- inspect_ai/_view/www/src/navbar/Navbar.mjs +12 -0
- inspect_ai/_view/www/src/samples/SampleList.mjs +2 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +2 -2
- inspect_ai/log/_log.py +3 -0
- inspect_ai/log/_recorders/eval.py +8 -7
- inspect_ai/model/_generate_config.py +6 -0
- inspect_ai/model/_providers/azureai.py +1 -1
- inspect_ai/model/_providers/bedrock.py +17 -1
- inspect_ai/model/_providers/hf.py +1 -1
- inspect_ai/model/_providers/openai.py +32 -8
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/vllm.py +1 -1
- inspect_ai/util/_sandbox/context.py +1 -2
- inspect_ai/util/_sandbox/docker/config.py +8 -10
- inspect_ai/util/_sandbox/docker/docker.py +9 -5
- inspect_ai/util/_sandbox/docker/util.py +3 -3
- inspect_ai/util/_sandbox/environment.py +7 -2
- inspect_ai/util/_sandbox/limits.py +1 -1
- inspect_ai/util/_sandbox/local.py +8 -9
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/METADATA +1 -3
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/RECORD +46 -46
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.52.dist-info → inspect_ai-0.3.53.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/task/sandbox.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
import asyncio
|
2
2
|
import base64
|
3
3
|
import contextlib
|
4
|
-
from
|
4
|
+
from random import random
|
5
|
+
from typing import AsyncGenerator, Callable, NamedTuple, cast
|
5
6
|
|
6
7
|
from inspect_ai._eval.task.task import Task
|
7
8
|
from inspect_ai._eval.task.util import task_run_dir
|
@@ -9,6 +10,7 @@ from inspect_ai._util.file import file, filesystem
|
|
9
10
|
from inspect_ai._util.registry import registry_unqualified_name
|
10
11
|
from inspect_ai._util.url import data_uri_to_base64, is_data_uri
|
11
12
|
from inspect_ai.dataset import Sample
|
13
|
+
from inspect_ai.util._concurrency import concurrency
|
12
14
|
from inspect_ai.util._sandbox.context import (
|
13
15
|
cleanup_sandbox_environments_sample,
|
14
16
|
init_sandbox_environments_sample,
|
@@ -18,12 +20,14 @@ from inspect_ai.util._sandbox.environment import (
|
|
18
20
|
SandboxEnvironmentConfigType,
|
19
21
|
SandboxEnvironmentSpec,
|
20
22
|
)
|
23
|
+
from inspect_ai.util._sandbox.registry import registry_find_sandboxenv
|
21
24
|
|
22
25
|
|
23
26
|
@contextlib.asynccontextmanager
|
24
27
|
async def sandboxenv_context(
|
25
28
|
task_name: str,
|
26
29
|
sandbox: SandboxEnvironmentSpec | None,
|
30
|
+
max_sandboxes: int | None,
|
27
31
|
cleanup: bool,
|
28
32
|
sample: Sample,
|
29
33
|
) -> AsyncGenerator[None, None]:
|
@@ -32,52 +36,77 @@ async def sandboxenv_context(
|
|
32
36
|
if not sandbox:
|
33
37
|
raise ValueError("sandboxenv_context called with no sandbox specified")
|
34
38
|
|
35
|
-
#
|
36
|
-
|
37
|
-
if sample.files:
|
38
|
-
for path, contents in sample.files.items():
|
39
|
-
files[path] = read_sandboxenv_file(contents)
|
40
|
-
|
41
|
-
# read setup script from sample (add bash shebang if necessary)
|
42
|
-
setup: bytes | None = None
|
43
|
-
if sample.setup:
|
44
|
-
setup = read_sandboxenv_file(sample.setup)
|
45
|
-
setup_str = setup.decode(encoding="utf-8")
|
46
|
-
if not setup_str.strip().startswith("#!"):
|
47
|
-
setup_str = f"#!/usr/bin/env bash\n\n{setup_str}"
|
48
|
-
setup = setup_str.encode(encoding="utf-8")
|
49
|
-
|
50
|
-
interrupted = False
|
51
|
-
environments: dict[str, SandboxEnvironment] | None = None
|
52
|
-
try:
|
53
|
-
# initialize sandbox environment,
|
54
|
-
environments = await init_sandbox_environments_sample(
|
55
|
-
type=sandbox.type,
|
56
|
-
task_name=registry_unqualified_name(task_name),
|
57
|
-
config=sandbox.config,
|
58
|
-
files=files,
|
59
|
-
setup=setup,
|
60
|
-
metadata=sample.metadata if sample.metadata else {},
|
61
|
-
)
|
62
|
-
|
63
|
-
# run sample
|
64
|
-
yield
|
65
|
-
|
66
|
-
except asyncio.CancelledError as ex:
|
67
|
-
interrupted = True
|
68
|
-
raise ex
|
39
|
+
# get sandboxenv_type
|
40
|
+
sandboxenv_type = registry_find_sandboxenv(sandbox.type)
|
69
41
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
42
|
+
# see if there is a max_sandboxes in play (passed or from type)
|
43
|
+
if max_sandboxes is None:
|
44
|
+
default_concurrency_fn = cast(
|
45
|
+
Callable[[], int | None], getattr(sandboxenv_type, "default_concurrency")
|
46
|
+
)
|
47
|
+
max_sandboxes = default_concurrency_fn()
|
48
|
+
|
49
|
+
# if we are enforcing max_sandboxes, then when samples are scheduled they may
|
50
|
+
# not get interleaved properly across tasks (because the first task will come
|
51
|
+
# in and grab all of the sandboxes). Therefore, in this case we wait a random
|
52
|
+
# delay so that all tasks/samples have an equal shot at getting scheduled.
|
53
|
+
if max_sandboxes is not None:
|
54
|
+
await asyncio.sleep(random())
|
55
|
+
|
56
|
+
# enforce concurrency if required
|
57
|
+
sandboxes_cm = (
|
58
|
+
concurrency(sandbox.type, max_sandboxes, f"sandboxes/{sandbox.type}")
|
59
|
+
if max_sandboxes is not None
|
60
|
+
else contextlib.nullcontext()
|
61
|
+
)
|
62
|
+
|
63
|
+
async with sandboxes_cm:
|
64
|
+
# read files from sample
|
65
|
+
files: dict[str, bytes] = {}
|
66
|
+
if sample.files:
|
67
|
+
for path, contents in sample.files.items():
|
68
|
+
files[path] = read_sandboxenv_file(contents)
|
69
|
+
|
70
|
+
# read setup script from sample (add bash shebang if necessary)
|
71
|
+
setup: bytes | None = None
|
72
|
+
if sample.setup:
|
73
|
+
setup = read_sandboxenv_file(sample.setup)
|
74
|
+
setup_str = setup.decode(encoding="utf-8")
|
75
|
+
if not setup_str.strip().startswith("#!"):
|
76
|
+
setup_str = f"#!/usr/bin/env bash\n\n{setup_str}"
|
77
|
+
setup = setup_str.encode(encoding="utf-8")
|
78
|
+
|
79
|
+
interrupted = False
|
80
|
+
environments: dict[str, SandboxEnvironment] | None = None
|
81
|
+
try:
|
82
|
+
# initialize sandbox environment,
|
83
|
+
environments = await init_sandbox_environments_sample(
|
84
|
+
sandboxenv_type=sandboxenv_type,
|
85
|
+
task_name=registry_unqualified_name(task_name),
|
76
86
|
config=sandbox.config,
|
77
|
-
|
78
|
-
|
87
|
+
files=files,
|
88
|
+
setup=setup,
|
89
|
+
metadata=sample.metadata if sample.metadata else {},
|
79
90
|
)
|
80
91
|
|
92
|
+
# run sample
|
93
|
+
yield
|
94
|
+
|
95
|
+
except asyncio.CancelledError as ex:
|
96
|
+
interrupted = True
|
97
|
+
raise ex
|
98
|
+
|
99
|
+
finally:
|
100
|
+
# cleanup sandbox environment
|
101
|
+
if environments and cleanup:
|
102
|
+
await cleanup_sandbox_environments_sample(
|
103
|
+
type=sandbox.type,
|
104
|
+
task_name=task_name,
|
105
|
+
config=sandbox.config,
|
106
|
+
environments=environments,
|
107
|
+
interrupted=interrupted,
|
108
|
+
)
|
109
|
+
|
81
110
|
|
82
111
|
def read_sandboxenv_file(contents: str) -> bytes:
|
83
112
|
if is_data_uri(contents):
|
inspect_ai/_eval/task/task.py
CHANGED
@@ -39,6 +39,8 @@ class Task:
|
|
39
39
|
|
40
40
|
Args:
|
41
41
|
dataset (Dataset | Sequence[Sample]): Dataset to evaluate
|
42
|
+
setup: (Solver | list[Solver] | None): Setup step (always run
|
43
|
+
even when the main `solver` is replaced).
|
42
44
|
solver: (Solver | list[Solver]): Solver or list of solvers.
|
43
45
|
Defaults to generate(), a normal call to the model.
|
44
46
|
scorer: (Scorer | list[Scorer] | None): Scorer used to evaluate model output.
|
@@ -68,6 +70,7 @@ class Task:
|
|
68
70
|
def __init__(
|
69
71
|
self,
|
70
72
|
dataset: Dataset | Sequence[Sample] | None = None,
|
73
|
+
setup: Solver | list[Solver] | None = None,
|
71
74
|
solver: Solver | list[Solver] = generate(),
|
72
75
|
scorer: Scorer | list[Scorer] | None = None,
|
73
76
|
metrics: list[Metric] | dict[str, list[Metric]] | None = None,
|
@@ -119,6 +122,7 @@ class Task:
|
|
119
122
|
self.dataset: Dataset = (
|
120
123
|
dataset if isinstance(dataset, Dataset) else MemoryDataset(list(dataset))
|
121
124
|
)
|
125
|
+
self.setup = setup
|
122
126
|
self.solver = chain(solver) if isinstance(solver, list) else solver
|
123
127
|
self.scorer = (
|
124
128
|
scorer
|
inspect_ai/_eval/task/util.py
CHANGED
@@ -42,6 +42,8 @@ def slice_dataset(
|
|
42
42
|
sample_id: str | int | list[str | int] | None,
|
43
43
|
) -> Dataset:
|
44
44
|
def normalise(id: str | int | None) -> str:
|
45
|
+
if isinstance(id, str) and id.isdigit():
|
46
|
+
id = int(id)
|
45
47
|
return id if isinstance(id, str) else str(id).zfill(20)
|
46
48
|
|
47
49
|
if sample_id is not None:
|
inspect_ai/_view/www/App.css
CHANGED
@@ -711,6 +711,19 @@ pre[class*="language-"].tool-output,
|
|
711
711
|
background-color: #333333;
|
712
712
|
}
|
713
713
|
|
714
|
+
pre[class*="language-"].tool-output {
|
715
|
+
border: none !important;
|
716
|
+
box-shadow: none !important;
|
717
|
+
border-radius: var(--bs-border-radius) !important;
|
718
|
+
}
|
719
|
+
|
720
|
+
.vscode-dark pre.jsonPanel {
|
721
|
+
background: none !important;
|
722
|
+
border: none !important;
|
723
|
+
box-shadow: none !important;
|
724
|
+
border-radius: var(--bs-border-radius) !important;
|
725
|
+
}
|
726
|
+
|
714
727
|
|
715
728
|
/* jsondiffpatch */
|
716
729
|
|
@@ -14984,6 +14984,19 @@ pre[class*="language-"].tool-output,
|
|
14984
14984
|
background-color: #333333;
|
14985
14985
|
}
|
14986
14986
|
|
14987
|
+
pre[class*="language-"].tool-output {
|
14988
|
+
border: none !important;
|
14989
|
+
box-shadow: none !important;
|
14990
|
+
border-radius: var(--bs-border-radius) !important;
|
14991
|
+
}
|
14992
|
+
|
14993
|
+
.vscode-dark pre.jsonPanel {
|
14994
|
+
background: none !important;
|
14995
|
+
border: none !important;
|
14996
|
+
box-shadow: none !important;
|
14997
|
+
border-radius: var(--bs-border-radius) !important;
|
14998
|
+
}
|
14999
|
+
|
14987
15000
|
|
14988
15001
|
/* jsondiffpatch */
|
14989
15002
|
|
@@ -15251,21 +15251,10 @@ const ToolCallView = ({
|
|
15251
15251
|
output,
|
15252
15252
|
mode
|
15253
15253
|
}) => {
|
15254
|
-
const icon = mode === "compact" ? "" : m$1`<i
|
15255
|
-
class="bi bi-tools"
|
15256
|
-
style=${{
|
15257
|
-
marginRight: "0.2rem",
|
15258
|
-
opacity: "0.4"
|
15259
|
-
}}
|
15260
|
-
></i>`;
|
15261
|
-
const codeIndent = mode === "compact" ? "" : "";
|
15262
15254
|
return m$1`<div>
|
15263
|
-
${
|
15264
|
-
${!view || view.title ? m$1`<code style=${{ fontSize: FontSize.small }}
|
15265
|
-
>${(view == null ? void 0 : view.title) || functionCall}</code
|
15266
|
-
>` : ""}
|
15255
|
+
${mode !== "compact" && (!view || view.title) ? m$1`<${ToolTitle} title=${(view == null ? void 0 : view.title) || functionCall} />` : ""}
|
15267
15256
|
<div>
|
15268
|
-
<div
|
15257
|
+
<div>
|
15269
15258
|
<${ToolInput}
|
15270
15259
|
type=${inputType}
|
15271
15260
|
contents=${input}
|
@@ -15274,12 +15263,39 @@ const ToolCallView = ({
|
|
15274
15263
|
/>
|
15275
15264
|
${output ? m$1`
|
15276
15265
|
<${ExpandablePanel} collapse=${true} border=${true} lines=${15}>
|
15277
|
-
<${MessageContent} contents=${output} />
|
15266
|
+
<${MessageContent} contents=${normalizeContent$1(output)} />
|
15278
15267
|
</${ExpandablePanel}>` : ""}
|
15279
15268
|
</div>
|
15280
15269
|
</div>
|
15281
15270
|
</div>`;
|
15282
15271
|
};
|
15272
|
+
const ToolTitle = ({ title }) => {
|
15273
|
+
return m$1` <i
|
15274
|
+
class="bi bi-tools"
|
15275
|
+
style=${{
|
15276
|
+
marginRight: "0.2rem",
|
15277
|
+
opacity: "0.4"
|
15278
|
+
}}
|
15279
|
+
></i>
|
15280
|
+
<code style=${{ fontSize: FontSize.small }}>${title}</code>`;
|
15281
|
+
};
|
15282
|
+
const normalizeContent$1 = (output) => {
|
15283
|
+
if (Array.isArray(output)) {
|
15284
|
+
return output;
|
15285
|
+
} else {
|
15286
|
+
return [
|
15287
|
+
{
|
15288
|
+
type: "tool",
|
15289
|
+
content: [
|
15290
|
+
{
|
15291
|
+
type: "text",
|
15292
|
+
text: String(output)
|
15293
|
+
}
|
15294
|
+
]
|
15295
|
+
}
|
15296
|
+
];
|
15297
|
+
}
|
15298
|
+
};
|
15283
15299
|
const ToolInput = ({ type, contents, view, style }) => {
|
15284
15300
|
if (!contents && !(view == null ? void 0 : view.content)) {
|
15285
15301
|
return "";
|
@@ -15455,8 +15471,7 @@ const extractInput = (inputKey, args) => {
|
|
15455
15471
|
args: []
|
15456
15472
|
};
|
15457
15473
|
};
|
15458
|
-
const MessageContent = (
|
15459
|
-
const { contents } = props;
|
15474
|
+
const MessageContent = ({ contents }) => {
|
15460
15475
|
if (Array.isArray(contents)) {
|
15461
15476
|
return contents.map((content, index) => {
|
15462
15477
|
if (typeof content === "string") {
|
@@ -19422,21 +19437,15 @@ const LoggerEventView = ({ id, event, style }) => {
|
|
19422
19437
|
};
|
19423
19438
|
const kPrismRenderMaxSize = 25e4;
|
19424
19439
|
const JSONPanel = ({ id, json, data, simple, style }) => {
|
19425
|
-
const sourceCode = json || JSON.stringify(data, void 0, 2);
|
19426
19440
|
const codeRef = A();
|
19427
|
-
|
19441
|
+
const sourceCode = T(() => {
|
19442
|
+
return json || JSON.stringify(data, void 0, 2);
|
19443
|
+
}, [json, data]);
|
19444
|
+
y(() => {
|
19428
19445
|
if (sourceCode.length < kPrismRenderMaxSize) {
|
19429
|
-
|
19430
|
-
sourceCode,
|
19431
|
-
Prism$1.languages.javascript,
|
19432
|
-
"javacript"
|
19433
|
-
);
|
19434
|
-
} else {
|
19435
|
-
const textNode = document.createTextNode(sourceCode);
|
19436
|
-
codeRef.current.innerText = "";
|
19437
|
-
codeRef.current.appendChild(textNode);
|
19446
|
+
Prism$1.highlightElement(codeRef.current);
|
19438
19447
|
}
|
19439
|
-
}
|
19448
|
+
}, [sourceCode]);
|
19440
19449
|
return m$1`<div>
|
19441
19450
|
<pre
|
19442
19451
|
style=${{
|
@@ -19446,16 +19455,18 @@ const JSONPanel = ({ id, json, data, simple, style }) => {
|
|
19446
19455
|
borderRadius: simple ? void 0 : "var(--bs-border-radius)",
|
19447
19456
|
...style
|
19448
19457
|
}}
|
19458
|
+
class="jsonPanel"
|
19449
19459
|
>
|
19450
19460
|
<code
|
19451
19461
|
id=${id}
|
19452
19462
|
ref=${codeRef}
|
19453
|
-
class="sourceCode-
|
19463
|
+
class="sourceCode language-javascript"
|
19454
19464
|
style=${{
|
19455
19465
|
fontSize: FontSize.small,
|
19456
19466
|
whiteSpace: "pre-wrap",
|
19457
19467
|
wordWrap: "anywhere"
|
19458
19468
|
}}>
|
19469
|
+
${sourceCode}
|
19459
19470
|
</code>
|
19460
19471
|
</pre>
|
19461
19472
|
</div>`;
|
@@ -19569,6 +19580,7 @@ const decisionIcon = (decision) => {
|
|
19569
19580
|
}
|
19570
19581
|
};
|
19571
19582
|
const ToolEventView = ({ id, event, style, depth }) => {
|
19583
|
+
var _a2;
|
19572
19584
|
const { input, functionCall, inputType } = resolveToolInput(
|
19573
19585
|
event.function,
|
19574
19586
|
event.arguments
|
@@ -19576,10 +19588,10 @@ const ToolEventView = ({ id, event, style, depth }) => {
|
|
19576
19588
|
const approvalEvent = event.events.find((e2) => {
|
19577
19589
|
return e2.event === "approval";
|
19578
19590
|
});
|
19579
|
-
const title = `Tool: ${event.function}`;
|
19591
|
+
const title = `Tool: ${((_a2 = event.view) == null ? void 0 : _a2.title) || event.function}`;
|
19580
19592
|
return m$1`
|
19581
19593
|
<${EventPanel} id=${id} title="${title}" subTitle=${formatDateTime(new Date(event.timestamp))} icon=${ApplicationIcons.solvers.use_tools} style=${style}>
|
19582
|
-
<div name="Summary" style=${{ margin: "0.5em 0" }}>
|
19594
|
+
<div name="Summary" style=${{ margin: "0.5em 0", width: "100%" }}>
|
19583
19595
|
<${ToolCallView}
|
19584
19596
|
functionCall=${functionCall}
|
19585
19597
|
input=${input}
|
@@ -20267,7 +20279,10 @@ const SampleSummary = ({ parent_id, sample, style, sampleDescriptor }) => {
|
|
20267
20279
|
clamp: true
|
20268
20280
|
});
|
20269
20281
|
}
|
20270
|
-
const fullAnswer = sample && sampleDescriptor ?
|
20282
|
+
const fullAnswer = sample && sampleDescriptor ? (
|
20283
|
+
// @ts-ignore
|
20284
|
+
sampleDescriptor.selectedScorer(sample).answer()
|
20285
|
+
) : void 0;
|
20271
20286
|
if (fullAnswer) {
|
20272
20287
|
columns.push({
|
20273
20288
|
label: "Answer",
|
@@ -20293,7 +20308,11 @@ const SampleSummary = ({ parent_id, sample, style, sampleDescriptor }) => {
|
|
20293
20308
|
value: sample.error ? m$1`<${FlatSampleError}
|
20294
20309
|
message=${sample.error.message}
|
20295
20310
|
style=${{ marginTop: "0.4rem" }}
|
20296
|
-
/>` :
|
20311
|
+
/>` : (
|
20312
|
+
// TODO: Cleanup once the PR lands which makes sample / sample summary share common interface
|
20313
|
+
// @ts-ignore
|
20314
|
+
sampleDescriptor == null ? void 0 : sampleDescriptor.selectedScore(sample).render()
|
20315
|
+
),
|
20297
20316
|
size: "minmax(2em, auto)",
|
20298
20317
|
center: true
|
20299
20318
|
});
|
@@ -20601,7 +20620,7 @@ const SampleList = (props) => {
|
|
20601
20620
|
[selectedIndex]
|
20602
20621
|
);
|
20603
20622
|
const listStyle = { ...style, flex: "1", overflowY: "auto", outline: "none" };
|
20604
|
-
const { limit, answer } = gridColumns(sampleDescriptor);
|
20623
|
+
const { limit, answer, target } = gridColumns(sampleDescriptor);
|
20605
20624
|
const headerRow = m$1`<div
|
20606
20625
|
style=${{
|
20607
20626
|
display: "grid",
|
@@ -20616,7 +20635,7 @@ const SampleList = (props) => {
|
|
20616
20635
|
>
|
20617
20636
|
<div>Id</div>
|
20618
20637
|
<div>Input</div>
|
20619
|
-
<div
|
20638
|
+
<div>${target !== "0" ? "Target" : ""}</div>
|
20620
20639
|
<div>${answer !== "0" ? "Answer" : ""}</div>
|
20621
20640
|
<div>${limit !== "0" ? "Limit" : ""}</div>
|
20622
20641
|
<div style=${{ justifySelf: "center" }}>Score</div>
|
@@ -24620,7 +24639,9 @@ const ResultsPanel = ({ results }) => {
|
|
24620
24639
|
flexWrap: "wrap",
|
24621
24640
|
justifyContent: "end",
|
24622
24641
|
height: "100%",
|
24623
|
-
alignItems: "center"
|
24642
|
+
alignItems: "center",
|
24643
|
+
maxHeight: "15em",
|
24644
|
+
overflow: "scroll"
|
24624
24645
|
}}
|
24625
24646
|
>
|
24626
24647
|
${metrics.map((metric, i) => {
|
@@ -24638,7 +24659,9 @@ const ResultsPanel = ({ results }) => {
|
|
24638
24659
|
alignItems: "center",
|
24639
24660
|
marginTop: "0.2rem",
|
24640
24661
|
paddingBottom: "0.4rem",
|
24641
|
-
rowGap: "1em"
|
24662
|
+
rowGap: "1em",
|
24663
|
+
maxHeight: "15em",
|
24664
|
+
overflow: "scroll"
|
24642
24665
|
}}
|
24643
24666
|
>
|
24644
24667
|
${(_b2 = results == null ? void 0 : results.scores) == null ? void 0 : _b2.map((score, index) => {
|
@@ -26223,8 +26246,12 @@ function App({
|
|
26223
26246
|
}
|
26224
26247
|
}
|
26225
26248
|
} catch (e2) {
|
26226
|
-
|
26227
|
-
|
26249
|
+
if (e2.message === "Load failed" || e2.message === "Failed to fetch") {
|
26250
|
+
setStatus({ loading: false });
|
26251
|
+
} else {
|
26252
|
+
console.log(e2);
|
26253
|
+
setStatus({ loading: false, error: e2 });
|
26254
|
+
}
|
26228
26255
|
}
|
26229
26256
|
setHeadersLoading(false);
|
26230
26257
|
};
|
@@ -26452,8 +26479,11 @@ function App({
|
|
26452
26479
|
}
|
26453
26480
|
new ClipboardJS(".clipboard-button,.copy-button");
|
26454
26481
|
if (pollForLogs) {
|
26455
|
-
|
26456
|
-
|
26482
|
+
let retryDelay = 1e3;
|
26483
|
+
const maxRetryDelay = 6e4;
|
26484
|
+
const pollEvents = async () => {
|
26485
|
+
try {
|
26486
|
+
const events = await api2.client_events();
|
26457
26487
|
if (events.includes("reload")) {
|
26458
26488
|
window.location.reload();
|
26459
26489
|
}
|
@@ -26462,8 +26492,15 @@ function App({
|
|
26462
26492
|
setLogs(logs2);
|
26463
26493
|
setSelectedLogIndex(0);
|
26464
26494
|
}
|
26465
|
-
|
26466
|
-
|
26495
|
+
retryDelay = 1e3;
|
26496
|
+
} catch (error2) {
|
26497
|
+
console.error("Error fetching client events:", error2);
|
26498
|
+
retryDelay = Math.min(retryDelay * 2, maxRetryDelay);
|
26499
|
+
} finally {
|
26500
|
+
setTimeout(pollEvents, retryDelay);
|
26501
|
+
}
|
26502
|
+
};
|
26503
|
+
pollEvents();
|
26467
26504
|
}
|
26468
26505
|
};
|
26469
26506
|
loadLogsAndState();
|
inspect_ai/_view/www/src/App.mjs
CHANGED
@@ -480,8 +480,13 @@ export function App({
|
|
480
480
|
}
|
481
481
|
}
|
482
482
|
} catch (e) {
|
483
|
-
|
484
|
-
|
483
|
+
if (e.message === "Load failed" || e.message === "Failed to fetch") {
|
484
|
+
// This will happen if the server disappears (e.g. inspect view is terminated)
|
485
|
+
setStatus({ loading: false });
|
486
|
+
} else {
|
487
|
+
console.log(e);
|
488
|
+
setStatus({ loading: false, error: e });
|
489
|
+
}
|
485
490
|
}
|
486
491
|
|
487
492
|
setHeadersLoading(false);
|
@@ -774,18 +779,38 @@ export function App({
|
|
774
779
|
new ClipboardJS(".clipboard-button,.copy-button");
|
775
780
|
|
776
781
|
if (pollForLogs) {
|
777
|
-
|
778
|
-
|
782
|
+
let retryDelay = 1000;
|
783
|
+
const maxRetryDelay = 60000;
|
784
|
+
|
785
|
+
const pollEvents = async () => {
|
786
|
+
try {
|
787
|
+
const events = await api.client_events();
|
788
|
+
|
779
789
|
if (events.includes("reload")) {
|
780
790
|
window.location.reload();
|
781
791
|
}
|
792
|
+
|
782
793
|
if (events.includes("refresh-evals")) {
|
783
794
|
const logs = await load();
|
784
795
|
setLogs(logs);
|
785
796
|
setSelectedLogIndex(0);
|
786
797
|
}
|
787
|
-
|
788
|
-
|
798
|
+
|
799
|
+
// Reset delay after a successful call
|
800
|
+
retryDelay = 1000;
|
801
|
+
} catch (error) {
|
802
|
+
console.error("Error fetching client events:", error);
|
803
|
+
|
804
|
+
// Exponential backoff with capping
|
805
|
+
retryDelay = Math.min(retryDelay * 2, maxRetryDelay);
|
806
|
+
} finally {
|
807
|
+
// Schedule the next poll
|
808
|
+
setTimeout(pollEvents, retryDelay);
|
809
|
+
}
|
810
|
+
};
|
811
|
+
|
812
|
+
// Start polling
|
813
|
+
pollEvents();
|
789
814
|
}
|
790
815
|
};
|
791
816
|
|
@@ -4,7 +4,7 @@ import Prism from "prismjs";
|
|
4
4
|
import "prismjs/components/prism-json";
|
5
5
|
|
6
6
|
import { html } from "htm/preact";
|
7
|
-
import { useRef } from "preact/hooks";
|
7
|
+
import { useEffect, useMemo, useRef } from "preact/hooks";
|
8
8
|
import { FontSize } from "../appearance/Fonts.mjs";
|
9
9
|
|
10
10
|
const kPrismRenderMaxSize = 250000;
|
@@ -22,25 +22,17 @@ const kPrismRenderMaxSize = 250000;
|
|
22
22
|
* @returns {import('preact').JSX.Element} The rendered component.
|
23
23
|
*/
|
24
24
|
export const JSONPanel = ({ id, json, data, simple, style }) => {
|
25
|
-
const sourceCode = json || JSON.stringify(data, undefined, 2);
|
26
25
|
const codeRef = useRef();
|
27
26
|
|
28
|
-
|
27
|
+
const sourceCode = useMemo(() => {
|
28
|
+
return json || JSON.stringify(data, undefined, 2);
|
29
|
+
}, [json, data]);
|
30
|
+
|
31
|
+
useEffect(() => {
|
29
32
|
if (sourceCode.length < kPrismRenderMaxSize) {
|
30
|
-
|
31
|
-
codeRef.current.innerHTML = Prism.highlight(
|
32
|
-
sourceCode,
|
33
|
-
Prism.languages.javascript,
|
34
|
-
"javacript",
|
35
|
-
);
|
36
|
-
} else {
|
37
|
-
const textNode = document.createTextNode(sourceCode);
|
38
|
-
// @ts-ignore
|
39
|
-
codeRef.current.innerText = "";
|
40
|
-
// @ts-ignore
|
41
|
-
codeRef.current.appendChild(textNode);
|
33
|
+
Prism.highlightElement(codeRef.current);
|
42
34
|
}
|
43
|
-
}
|
35
|
+
}, [sourceCode]);
|
44
36
|
|
45
37
|
return html`<div>
|
46
38
|
<pre
|
@@ -51,16 +43,18 @@ export const JSONPanel = ({ id, json, data, simple, style }) => {
|
|
51
43
|
borderRadius: simple ? undefined : "var(--bs-border-radius)",
|
52
44
|
...style,
|
53
45
|
}}
|
46
|
+
class="jsonPanel"
|
54
47
|
>
|
55
48
|
<code
|
56
49
|
id=${id}
|
57
50
|
ref=${codeRef}
|
58
|
-
class="sourceCode-
|
51
|
+
class="sourceCode language-javascript"
|
59
52
|
style=${{
|
60
53
|
fontSize: FontSize.small,
|
61
54
|
whiteSpace: "pre-wrap",
|
62
55
|
wordWrap: "anywhere",
|
63
56
|
}}>
|
57
|
+
${sourceCode}
|
64
58
|
</code>
|
65
59
|
</pre>
|
66
60
|
</div>`;
|
@@ -2,8 +2,15 @@ import { html } from "htm/preact";
|
|
2
2
|
import { MarkdownDiv } from "./MarkdownDiv.mjs";
|
3
3
|
import { ToolOutput } from "./Tools.mjs";
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
/**
|
6
|
+
* Renders message content based on its type.
|
7
|
+
* Supports rendering strings, images, and tools using specific renderers.
|
8
|
+
*
|
9
|
+
* @param {Object} props - The props object.
|
10
|
+
* @param {string|string[]| (import("../types/log").ContentText | import("../types/log").ContentImage | import("../Types.mjs").ContentTool)[]} props.contents - The content or array of contents to render.
|
11
|
+
* @returns {import("preact").JSX.Element | import("preact").JSX.Element[]} The component.
|
12
|
+
*/
|
13
|
+
export const MessageContent = ({ contents }) => {
|
7
14
|
if (Array.isArray(contents)) {
|
8
15
|
return contents.map((content, index) => {
|
9
16
|
if (typeof content === "string") {
|