inspect-ai 0.3.103__py3-none-any.whl → 0.3.105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +2 -1
- inspect_ai/_cli/eval.py +2 -2
- inspect_ai/_display/core/active.py +3 -0
- inspect_ai/_display/core/config.py +1 -0
- inspect_ai/_display/core/panel.py +21 -13
- inspect_ai/_display/core/results.py +3 -7
- inspect_ai/_display/core/rich.py +3 -5
- inspect_ai/_display/log/__init__.py +0 -0
- inspect_ai/_display/log/display.py +173 -0
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_display/rich/display.py +2 -4
- inspect_ai/_display/textual/app.py +1 -6
- inspect_ai/_display/textual/widgets/task_detail.py +3 -14
- inspect_ai/_display/textual/widgets/tasks.py +1 -1
- inspect_ai/_eval/eval.py +1 -1
- inspect_ai/_eval/evalset.py +3 -3
- inspect_ai/_eval/registry.py +6 -1
- inspect_ai/_eval/run.py +5 -1
- inspect_ai/_eval/task/constants.py +1 -0
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/run.py +65 -39
- inspect_ai/_util/citation.py +88 -0
- inspect_ai/_util/content.py +24 -2
- inspect_ai/_util/json.py +17 -2
- inspect_ai/_util/registry.py +19 -4
- inspect_ai/_view/schema.py +0 -6
- inspect_ai/_view/server.py +17 -0
- inspect_ai/_view/www/dist/assets/index.css +93 -31
- inspect_ai/_view/www/dist/assets/index.js +10639 -10011
- inspect_ai/_view/www/log-schema.json +418 -1
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/@types/log.d.ts +140 -39
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
- inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +4 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +26 -0
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +14 -3
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +359 -7
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/language.ts +6 -0
- inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
- inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
- inspect_ai/_view/www/src/client/api/api-browser.ts +25 -0
- inspect_ai/_view/www/src/client/api/api-http.ts +3 -0
- inspect_ai/_view/www/src/client/api/api-vscode.ts +6 -0
- inspect_ai/_view/www/src/client/api/client-api.ts +3 -0
- inspect_ai/_view/www/src/client/api/jsonrpc.ts +1 -0
- inspect_ai/_view/www/src/client/api/types.ts +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
- inspect_ai/_view/www/src/state/samplePolling.ts +17 -1
- inspect_ai/_view/www/src/tests/README.md +2 -2
- inspect_ai/_view/www/src/utils/git.ts +3 -1
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/agent/_handoff.py +8 -5
- inspect_ai/agent/_react.py +5 -5
- inspect_ai/dataset/_dataset.py +1 -1
- inspect_ai/log/_condense.py +5 -0
- inspect_ai/log/_file.py +4 -1
- inspect_ai/log/_log.py +9 -4
- inspect_ai/log/_recorders/json.py +4 -2
- inspect_ai/log/_samples.py +5 -0
- inspect_ai/log/_util.py +2 -0
- inspect_ai/model/__init__.py +14 -0
- inspect_ai/model/_call_tools.py +17 -8
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_openai_responses.py +80 -34
- inspect_ai/model/_providers/_anthropic_citations.py +158 -0
- inspect_ai/model/_providers/_google_citations.py +100 -0
- inspect_ai/model/_providers/anthropic.py +219 -36
- inspect_ai/model/_providers/google.py +98 -22
- inspect_ai/model/_providers/mistral.py +20 -7
- inspect_ai/model/_providers/openai.py +11 -10
- inspect_ai/model/_providers/openai_compatible.py +3 -2
- inspect_ai/model/_providers/openai_responses.py +2 -5
- inspect_ai/model/_providers/perplexity.py +123 -0
- inspect_ai/model/_providers/providers.py +13 -2
- inspect_ai/model/_providers/vertex.py +3 -0
- inspect_ai/model/_trim.py +5 -0
- inspect_ai/tool/__init__.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +5 -2
- inspect_ai/tool/_mcp/sampling.py +19 -3
- inspect_ai/tool/_mcp/server.py +1 -1
- inspect_ai/tool/_tool.py +10 -1
- inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
- inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
- inspect_ai/tool/_tools/_web_search/_google.py +22 -25
- inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
- inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
- inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
- inspect_ai/util/__init__.py +8 -0
- inspect_ai/util/_background.py +64 -0
- inspect_ai/util/_display.py +11 -2
- inspect_ai/util/_limit.py +72 -5
- inspect_ai/util/_sandbox/__init__.py +2 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/service.py +28 -7
- inspect_ai/util/_span.py +12 -1
- inspect_ai/util/_subprocess.py +51 -38
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/RECORD +134 -109
- /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
- /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/task/run.py
CHANGED
@@ -10,6 +10,7 @@ from pathlib import PurePath
|
|
10
10
|
from typing import Callable, Literal
|
11
11
|
|
12
12
|
import anyio
|
13
|
+
from anyio.abc import TaskGroup
|
13
14
|
from typing_extensions import Unpack
|
14
15
|
|
15
16
|
from inspect_ai._display import (
|
@@ -223,7 +224,7 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
|
|
223
224
|
samples=len(samples),
|
224
225
|
steps=len(samples) * SAMPLE_TOTAL_PROGRESS_UNITS,
|
225
226
|
eval_config=config,
|
226
|
-
task_args=logger.eval.
|
227
|
+
task_args=logger.eval.task_args_passed,
|
227
228
|
generate_config=generate_config,
|
228
229
|
tags=tags,
|
229
230
|
log_location=log_location,
|
@@ -306,37 +307,57 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
|
|
306
307
|
task.metrics,
|
307
308
|
)
|
308
309
|
|
310
|
+
async def run_sample(
|
311
|
+
sample: Sample, state: TaskState
|
312
|
+
) -> dict[str, SampleScore] | None:
|
313
|
+
result: dict[str, SampleScore] | None = None
|
314
|
+
|
315
|
+
async def run(tg: TaskGroup) -> None:
|
316
|
+
try:
|
317
|
+
nonlocal result
|
318
|
+
result = await task_run_sample(
|
319
|
+
tg=tg,
|
320
|
+
task_name=task.name,
|
321
|
+
log_location=profile.log_location,
|
322
|
+
sample=sample,
|
323
|
+
state=state,
|
324
|
+
sandbox=sandbox,
|
325
|
+
max_sandboxes=config.max_sandboxes,
|
326
|
+
sandbox_cleanup=sandbox_cleanup,
|
327
|
+
plan=plan,
|
328
|
+
scorers=scorers,
|
329
|
+
generate=generate,
|
330
|
+
progress=progress,
|
331
|
+
logger=logger if log_samples else None,
|
332
|
+
log_images=log_images,
|
333
|
+
sample_source=sample_source,
|
334
|
+
sample_error=sample_error_handler,
|
335
|
+
sample_complete=sample_complete,
|
336
|
+
fails_on_error=(
|
337
|
+
config.fail_on_error is None
|
338
|
+
or config.fail_on_error is True
|
339
|
+
),
|
340
|
+
retry_on_error=config.retry_on_error or 0,
|
341
|
+
error_retries=[],
|
342
|
+
time_limit=config.time_limit,
|
343
|
+
working_limit=config.working_limit,
|
344
|
+
semaphore=sample_semaphore,
|
345
|
+
)
|
346
|
+
finally:
|
347
|
+
tg.cancel_scope.cancel()
|
348
|
+
|
349
|
+
async with anyio.create_task_group() as tg:
|
350
|
+
tg.start_soon(run, tg)
|
351
|
+
|
352
|
+
return result
|
353
|
+
|
309
354
|
sample_results = await tg_collect(
|
310
355
|
[
|
311
|
-
functools.partial(
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
sample=sample,
|
316
|
-
state=state,
|
317
|
-
sandbox=sandbox,
|
318
|
-
max_sandboxes=config.max_sandboxes,
|
319
|
-
sandbox_cleanup=sandbox_cleanup,
|
320
|
-
plan=plan,
|
321
|
-
scorers=scorers,
|
322
|
-
generate=generate,
|
323
|
-
progress=progress,
|
324
|
-
logger=logger if log_samples else None,
|
325
|
-
log_images=log_images,
|
326
|
-
sample_source=sample_source,
|
327
|
-
sample_error=sample_error_handler,
|
328
|
-
sample_complete=sample_complete,
|
329
|
-
fails_on_error=(
|
330
|
-
config.fail_on_error is None
|
331
|
-
or config.fail_on_error is True
|
332
|
-
),
|
333
|
-
retry_on_error=config.retry_on_error or 0,
|
334
|
-
error_retries=[],
|
335
|
-
time_limit=config.time_limit,
|
336
|
-
working_limit=config.working_limit,
|
337
|
-
semaphore=sample_semaphore,
|
356
|
+
functools.partial(run_sample, sample, state)
|
357
|
+
for (sample, state) in zip(
|
358
|
+
samples,
|
359
|
+
states,
|
338
360
|
)
|
339
|
-
for (sample, state) in zip(samples, states)
|
340
361
|
]
|
341
362
|
)
|
342
363
|
|
@@ -492,6 +513,7 @@ def update_metrics_display_fn(
|
|
492
513
|
|
493
514
|
async def task_run_sample(
|
494
515
|
*,
|
516
|
+
tg: TaskGroup,
|
495
517
|
task_name: str,
|
496
518
|
log_location: str,
|
497
519
|
sample: Sample,
|
@@ -611,12 +633,14 @@ async def task_run_sample(
|
|
611
633
|
working_limit=working_limit,
|
612
634
|
fails_on_error=fails_on_error or (retry_on_error > 0),
|
613
635
|
transcript=sample_transcript,
|
636
|
+
tg=tg,
|
614
637
|
) as active,
|
615
638
|
):
|
616
639
|
start_time: float | None = None
|
617
640
|
error: EvalError | None = None
|
618
641
|
raise_error: BaseException | None = None
|
619
642
|
results: dict[str, SampleScore] = {}
|
643
|
+
limit: EvalSampleLimit | None = None
|
620
644
|
try:
|
621
645
|
# begin init
|
622
646
|
init_span = span("init", type="init")
|
@@ -704,9 +728,17 @@ async def task_run_sample(
|
|
704
728
|
# handle the cancel exception
|
705
729
|
raise
|
706
730
|
|
707
|
-
except
|
731
|
+
except LimitExceededError as ex:
|
732
|
+
# capture most recent state for scoring
|
733
|
+
state = sample_state() or state
|
734
|
+
limit = EvalSampleLimit(
|
735
|
+
type=ex.type, limit=ex.limit if ex.limit is not None else -1
|
736
|
+
)
|
737
|
+
|
738
|
+
except TerminateSampleError:
|
708
739
|
# capture most recent state for scoring
|
709
740
|
state = sample_state() or state
|
741
|
+
limit = EvalSampleLimit(type="operator", limit=1)
|
710
742
|
|
711
743
|
except BaseException as ex:
|
712
744
|
error, raise_error = handle_error(ex)
|
@@ -815,6 +847,7 @@ async def task_run_sample(
|
|
815
847
|
state=state,
|
816
848
|
scores=results,
|
817
849
|
error=error,
|
850
|
+
limit=limit,
|
818
851
|
error_retries=error_retries,
|
819
852
|
log_images=log_images,
|
820
853
|
)
|
@@ -854,6 +887,7 @@ async def task_run_sample(
|
|
854
887
|
time_limit=time_limit,
|
855
888
|
working_limit=working_limit,
|
856
889
|
semaphore=semaphore,
|
890
|
+
tg=tg,
|
857
891
|
)
|
858
892
|
|
859
893
|
# no error
|
@@ -879,6 +913,7 @@ async def log_sample(
|
|
879
913
|
state: TaskState,
|
880
914
|
scores: dict[str, SampleScore],
|
881
915
|
error: EvalError | None,
|
916
|
+
limit: EvalSampleLimit | None,
|
882
917
|
error_retries: list[EvalError],
|
883
918
|
log_images: bool,
|
884
919
|
) -> None:
|
@@ -894,15 +929,6 @@ async def log_sample(
|
|
894
929
|
# compute total time if we can
|
895
930
|
total_time = time.monotonic() - start_time if start_time is not None else None
|
896
931
|
|
897
|
-
# if a limit was hit, note that in the Eval Sample
|
898
|
-
limit = None
|
899
|
-
for e in transcript().events:
|
900
|
-
if e.event == "sample_limit":
|
901
|
-
limit = EvalSampleLimit(
|
902
|
-
type=e.type, limit=e.limit if e.limit is not None else -1
|
903
|
-
)
|
904
|
-
break
|
905
|
-
|
906
932
|
eval_sample = EvalSample(
|
907
933
|
id=id,
|
908
934
|
epoch=state.epoch,
|
@@ -0,0 +1,88 @@
|
|
1
|
+
from typing import Annotated, Literal, TypeAlias, Union
|
2
|
+
|
3
|
+
from pydantic import BaseModel, Discriminator, Field, JsonValue
|
4
|
+
|
5
|
+
|
6
|
+
class CitationBase(BaseModel):
|
7
|
+
"""Base class for citations."""
|
8
|
+
|
9
|
+
cited_text: str | tuple[int, int] | None = Field(
|
10
|
+
default=None,
|
11
|
+
# without helping the schema generator, this will turn into [unknown, unknown] in TypeScript
|
12
|
+
json_schema_extra={
|
13
|
+
"anyOf": [
|
14
|
+
{"type": "string"},
|
15
|
+
{
|
16
|
+
"type": "array",
|
17
|
+
"items": [{"type": "integer"}, {"type": "integer"}],
|
18
|
+
"additionalItems": False,
|
19
|
+
"minItems": 2,
|
20
|
+
"maxItems": 2,
|
21
|
+
},
|
22
|
+
{"type": "null"},
|
23
|
+
]
|
24
|
+
},
|
25
|
+
)
|
26
|
+
"""
|
27
|
+
The cited text
|
28
|
+
|
29
|
+
This can be the text itself or a start/end range of the text content within
|
30
|
+
the container that is the cited text.
|
31
|
+
"""
|
32
|
+
|
33
|
+
title: str | None = None
|
34
|
+
"""Title of the cited resource."""
|
35
|
+
|
36
|
+
internal: dict[str, JsonValue] | None = Field(default=None)
|
37
|
+
"""Model provider specific payload - typically used to aid transformation back to model types."""
|
38
|
+
|
39
|
+
|
40
|
+
class ContentCitation(CitationBase):
|
41
|
+
"""A generic content citation."""
|
42
|
+
|
43
|
+
type: Literal["content"] = Field(default="content")
|
44
|
+
"""Type."""
|
45
|
+
|
46
|
+
|
47
|
+
class DocumentRange(BaseModel):
|
48
|
+
"""A range specifying a section of a document."""
|
49
|
+
|
50
|
+
type: Literal["block", "page", "char"]
|
51
|
+
"""The type of the document section specified by the range."""
|
52
|
+
|
53
|
+
start_index: int
|
54
|
+
"""0 based index of the start of the range."""
|
55
|
+
|
56
|
+
end_index: int
|
57
|
+
"""0 based index of the end of the range."""
|
58
|
+
|
59
|
+
|
60
|
+
class DocumentCitation(CitationBase):
|
61
|
+
"""A citation that refers to a page range in a document."""
|
62
|
+
|
63
|
+
type: Literal["document"] = Field(default="document")
|
64
|
+
"""Type."""
|
65
|
+
|
66
|
+
range: DocumentRange | None = Field(default=None)
|
67
|
+
"""Range of the document that is cited."""
|
68
|
+
|
69
|
+
|
70
|
+
class UrlCitation(CitationBase):
|
71
|
+
"""A citation that refers to a URL."""
|
72
|
+
|
73
|
+
type: Literal["url"] = Field(default="url")
|
74
|
+
"""Type."""
|
75
|
+
|
76
|
+
url: str
|
77
|
+
"""URL of the cited resource."""
|
78
|
+
|
79
|
+
|
80
|
+
Citation: TypeAlias = Annotated[
|
81
|
+
Union[
|
82
|
+
ContentCitation,
|
83
|
+
DocumentCitation,
|
84
|
+
UrlCitation,
|
85
|
+
],
|
86
|
+
Discriminator("type"),
|
87
|
+
]
|
88
|
+
"""A citation sent to or received from a model."""
|
inspect_ai/_util/content.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
from typing import Literal, Union
|
1
|
+
from typing import Literal, Sequence, Union
|
2
2
|
|
3
3
|
from pydantic import BaseModel, Field, JsonValue
|
4
4
|
|
5
|
+
from inspect_ai._util.citation import Citation
|
6
|
+
|
5
7
|
|
6
8
|
class ContentBase(BaseModel):
|
7
9
|
internal: JsonValue | None = Field(default=None)
|
@@ -20,6 +22,9 @@ class ContentText(ContentBase):
|
|
20
22
|
refusal: bool | None = Field(default=None)
|
21
23
|
"""Was this a refusal message?"""
|
22
24
|
|
25
|
+
citations: Sequence[Citation] | None = Field(default=None)
|
26
|
+
"""Citations supporting the text block."""
|
27
|
+
|
23
28
|
|
24
29
|
class ContentReasoning(ContentBase):
|
25
30
|
"""Reasoning content.
|
@@ -82,5 +87,22 @@ class ContentVideo(ContentBase):
|
|
82
87
|
"""Format of video data ('mp4', 'mpeg', or 'mov')"""
|
83
88
|
|
84
89
|
|
85
|
-
|
90
|
+
class ContentData(ContentBase):
|
91
|
+
"""Model internal."""
|
92
|
+
|
93
|
+
type: Literal["data"] = Field(default="data")
|
94
|
+
"""Type."""
|
95
|
+
|
96
|
+
data: dict[str, JsonValue]
|
97
|
+
"""Model provider specific payload - required for internal content."""
|
98
|
+
|
99
|
+
|
100
|
+
Content = Union[
|
101
|
+
ContentText,
|
102
|
+
ContentReasoning,
|
103
|
+
ContentImage,
|
104
|
+
ContentAudio,
|
105
|
+
ContentVideo,
|
106
|
+
ContentData,
|
107
|
+
]
|
86
108
|
"""Content sent to or received from a model."""
|
inspect_ai/_util/json.py
CHANGED
@@ -6,7 +6,7 @@ from typing import (
|
|
6
6
|
|
7
7
|
import jsonpatch
|
8
8
|
from pydantic import BaseModel, Field, JsonValue
|
9
|
-
from pydantic_core import to_json, to_jsonable_python
|
9
|
+
from pydantic_core import PydanticSerializationError, to_json, to_jsonable_python
|
10
10
|
|
11
11
|
JSONType = Literal["string", "integer", "number", "boolean", "array", "object", "null"]
|
12
12
|
"""Valid types within JSON schema."""
|
@@ -27,7 +27,22 @@ def jsonable_dict(x: Any) -> dict[str, JsonValue]:
|
|
27
27
|
|
28
28
|
|
29
29
|
def to_json_safe(x: Any) -> bytes:
|
30
|
-
|
30
|
+
def clean_utf8_json(obj: Any) -> Any:
|
31
|
+
if isinstance(obj, str):
|
32
|
+
return obj.encode("utf-8", errors="replace").decode("utf-8")
|
33
|
+
elif isinstance(obj, dict):
|
34
|
+
return {k: clean_utf8_json(v) for k, v in obj.items()}
|
35
|
+
elif isinstance(obj, list):
|
36
|
+
return [clean_utf8_json(item) for item in obj]
|
37
|
+
return obj
|
38
|
+
|
39
|
+
try:
|
40
|
+
return to_json(value=x, indent=2, exclude_none=True, fallback=lambda _x: None)
|
41
|
+
except PydanticSerializationError as ex:
|
42
|
+
if "surrogates not allowed" in str(ex):
|
43
|
+
cleaned = clean_utf8_json(x)
|
44
|
+
return to_json(cleaned)
|
45
|
+
raise
|
31
46
|
|
32
47
|
|
33
48
|
def to_json_str_safe(x: Any) -> str:
|
inspect_ai/_util/registry.py
CHANGED
@@ -102,9 +102,26 @@ def registry_tag(
|
|
102
102
|
*args (list[Any]): Creation arguments
|
103
103
|
**kwargs (dict[str,Any]): Creation keyword arguments
|
104
104
|
"""
|
105
|
+
# bind arguments to params
|
106
|
+
named_params = extract_named_params(type, False, *args, **kwargs)
|
107
|
+
|
108
|
+
# set attribute
|
109
|
+
setattr(o, REGISTRY_INFO, info)
|
110
|
+
setattr(o, REGISTRY_PARAMS, named_params)
|
111
|
+
|
112
|
+
|
113
|
+
def extract_named_params(
|
114
|
+
type: Callable[..., Any], apply_defaults: bool, *args: Any, **kwargs: Any
|
115
|
+
) -> dict[str, Any]:
|
105
116
|
# bind arguments to params
|
106
117
|
named_params: dict[str, Any] = {}
|
107
|
-
|
118
|
+
|
119
|
+
if apply_defaults:
|
120
|
+
bound_params = inspect.signature(type).bind_partial(*args, **kwargs)
|
121
|
+
bound_params.apply_defaults()
|
122
|
+
else:
|
123
|
+
bound_params = inspect.signature(type).bind(*args, **kwargs)
|
124
|
+
|
108
125
|
for param, value in bound_params.arguments.items():
|
109
126
|
named_params[param] = registry_value(value)
|
110
127
|
|
@@ -128,9 +145,7 @@ def registry_tag(
|
|
128
145
|
or "<unknown>"
|
129
146
|
)
|
130
147
|
|
131
|
-
|
132
|
-
setattr(o, REGISTRY_INFO, info)
|
133
|
-
setattr(o, REGISTRY_PARAMS, named_params)
|
148
|
+
return named_params
|
134
149
|
|
135
150
|
|
136
151
|
def registry_name(o: object, name: str) -> str:
|
inspect_ai/_view/schema.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
|
-
import shutil
|
4
3
|
import subprocess
|
5
4
|
from pathlib import Path
|
6
5
|
from typing import Any
|
@@ -19,9 +18,6 @@ def sync_view_schema() -> None:
|
|
19
18
|
# export schema file
|
20
19
|
schema_path = Path(WWW_DIR, "log-schema.json")
|
21
20
|
types_path = Path(WWW_DIR, "src", "@types", "log.d.ts")
|
22
|
-
vs_code_types_path = Path(
|
23
|
-
WWW_DIR, "..", "..", "..", "..", "tools", "vscode", "src", "@types", "log.d.ts"
|
24
|
-
)
|
25
21
|
|
26
22
|
with open(schema_path, "w", encoding="utf-8") as f:
|
27
23
|
# make everything required
|
@@ -50,8 +46,6 @@ def sync_view_schema() -> None:
|
|
50
46
|
|
51
47
|
subprocess.run(["yarn", "prettier:write"], cwd=types_path.parent, check=True)
|
52
48
|
|
53
|
-
shutil.copyfile(types_path, vs_code_types_path)
|
54
|
-
|
55
49
|
|
56
50
|
def schema_to_strict(schema: dict[str, Any]) -> dict[str, Any]:
|
57
51
|
properties = schema.get("properties", None)
|
inspect_ai/_view/server.py
CHANGED
@@ -155,6 +155,23 @@ def view_server(
|
|
155
155
|
body=samples.model_dump_json(), headers={"ETag": samples.etag}
|
156
156
|
)
|
157
157
|
|
158
|
+
@routes.get("/api/log-message")
|
159
|
+
async def api_log_message(request: web.Request) -> web.Response:
|
160
|
+
# log file requested
|
161
|
+
file = query_param_required("log_file", request, str)
|
162
|
+
|
163
|
+
file = urllib.parse.unquote(file)
|
164
|
+
validate_log_file_request(file)
|
165
|
+
|
166
|
+
# message to log
|
167
|
+
message = query_param_required("message", request, str)
|
168
|
+
|
169
|
+
# log the message
|
170
|
+
logger.warning(f"[CLIENT MESSAGE] ({file}): {message}")
|
171
|
+
|
172
|
+
# respond
|
173
|
+
return web.Response(status=204)
|
174
|
+
|
158
175
|
@routes.get("/api/pending-sample-data")
|
159
176
|
async def api_sample_events(request: web.Request) -> web.Response:
|
160
177
|
# log file requested
|
@@ -15655,18 +15655,40 @@ pre[class*="language-"] {
|
|
15655
15655
|
background-color: var(--bs-light-border-subtle);
|
15656
15656
|
margin-top: -1px;
|
15657
15657
|
}
|
15658
|
-
.
|
15658
|
+
._keyPairContainer_1ltuo_1 {
|
15659
|
+
display: grid;
|
15660
|
+
grid-template-columns: max-content auto;
|
15661
|
+
column-gap: 0.5em;
|
15662
|
+
padding-top: 4px;
|
15663
|
+
padding-bottom: 4px;
|
15664
|
+
border-bottom: solid 1px var(--bs-border-color);
|
15665
|
+
}
|
15666
|
+
|
15667
|
+
._key_1ltuo_1 {
|
15668
|
+
display: grid;
|
15669
|
+
grid-template-columns: 1em auto;
|
15670
|
+
cursor: pointer;
|
15671
|
+
}
|
15672
|
+
|
15673
|
+
._pre_1ltuo_16 {
|
15674
|
+
margin-bottom: 0;
|
15675
|
+
}
|
15676
|
+
|
15677
|
+
._treeIcon_1ltuo_20 {
|
15678
|
+
margin-top: -3px;
|
15679
|
+
}
|
15680
|
+
._message_1ivu3_1 {
|
15659
15681
|
font-weight: 300;
|
15660
15682
|
margin-left: 0;
|
15661
15683
|
margin-right: 0;
|
15662
15684
|
white-space: normal;
|
15663
15685
|
}
|
15664
15686
|
|
15665
|
-
.
|
15687
|
+
._systemRole_1ivu3_8 {
|
15666
15688
|
opacity: 0.7;
|
15667
15689
|
}
|
15668
15690
|
|
15669
|
-
.
|
15691
|
+
._messageGrid_1ivu3_12 {
|
15670
15692
|
display: grid;
|
15671
15693
|
grid-template-columns: max-content max-content max-content;
|
15672
15694
|
column-gap: 0.3em;
|
@@ -15674,36 +15696,98 @@ pre[class*="language-"] {
|
|
15674
15696
|
margin-bottom: 0.3em;
|
15675
15697
|
}
|
15676
15698
|
|
15677
|
-
.
|
15699
|
+
._messageContents_1ivu3_20 {
|
15678
15700
|
margin-left: 0;
|
15679
15701
|
padding-bottom: 0;
|
15680
15702
|
}
|
15681
15703
|
|
15682
|
-
.
|
15704
|
+
._messageContents_1ivu3_20._indented_1ivu3_25 {
|
15683
15705
|
margin-left: 0rem;
|
15684
15706
|
}
|
15685
15707
|
|
15686
|
-
.
|
15708
|
+
._copyLink_1ivu3_29 {
|
15687
15709
|
opacity: 0;
|
15688
15710
|
padding-left: 0;
|
15689
15711
|
padding-right: 2em;
|
15690
15712
|
}
|
15691
15713
|
|
15692
|
-
.
|
15714
|
+
._copyLink_1ivu3_29:hover {
|
15693
15715
|
opacity: 1;
|
15694
15716
|
}
|
15695
|
-
|
15717
|
+
|
15718
|
+
._metadataLabel_1ivu3_39 {
|
15719
|
+
padding-top: 1em;
|
15720
|
+
}
|
15721
|
+
._webSearch_1376z_1 {
|
15722
|
+
display: grid;
|
15723
|
+
grid-template-columns: max-content 1fr;
|
15724
|
+
column-gap: 0.5em;
|
15725
|
+
align-items: baseline;
|
15726
|
+
}
|
15727
|
+
|
15728
|
+
._query_1376z_8 {
|
15729
|
+
font-family: var(--bs-font-monospace);
|
15730
|
+
}
|
15731
|
+
._contentData_1sd1z_1 {
|
15732
|
+
border: solid var(--bs-light-border-subtle) 1px;
|
15733
|
+
padding: 0.5em;
|
15734
|
+
margin-bottom: 0.5em;
|
15735
|
+
margin-top: 0.5em;
|
15736
|
+
margin-left: 1em;
|
15737
|
+
}
|
15738
|
+
._webSearch_1mixg_1 {
|
15739
|
+
display: grid;
|
15740
|
+
grid-template-columns: max-content 1fr;
|
15741
|
+
column-gap: 0.5em;
|
15742
|
+
align-items: baseline;
|
15743
|
+
}
|
15744
|
+
|
15745
|
+
._query_1mixg_8 {
|
15746
|
+
font-family: var(--bs-font-monospace);
|
15747
|
+
}
|
15748
|
+
|
15749
|
+
._result_1mixg_12 a:hover {
|
15750
|
+
text-decoration: underline;
|
15751
|
+
}
|
15752
|
+
|
15753
|
+
._result_1mixg_12 a {
|
15754
|
+
opacity: 0.8;
|
15755
|
+
text-decoration: none;
|
15756
|
+
}
|
15757
|
+
._citations_t2k1z_1 {
|
15758
|
+
margin-top: 1em;
|
15759
|
+
margin-bottom: 1em;
|
15760
|
+
display: grid;
|
15761
|
+
grid-template-columns: max-content 1fr;
|
15762
|
+
column-gap: 0.5em;
|
15763
|
+
}
|
15764
|
+
|
15765
|
+
a._citationLink_t2k1z_9 {
|
15766
|
+
display: block;
|
15767
|
+
color: var(--bs-body);
|
15768
|
+
text-decoration: none;
|
15769
|
+
}
|
15770
|
+
a._citationLink_t2k1z_9:hover {
|
15771
|
+
text-decoration: underline;
|
15772
|
+
}
|
15773
|
+
._contentImage_8rgix_1 {
|
15696
15774
|
max-width: 800px;
|
15697
15775
|
border: solid var(--bs-border-color) 1px;
|
15698
15776
|
}
|
15699
15777
|
|
15700
|
-
.
|
15778
|
+
._reasoning_8rgix_6 {
|
15701
15779
|
border: solid var(--bs-light-border-subtle) 1px;
|
15702
15780
|
padding: 1em;
|
15703
15781
|
margin-bottom: 0.5em;
|
15704
15782
|
background-color: var(--bs-light-bg-subtle);
|
15705
15783
|
border-radius: var(--bs-border-radius);
|
15706
15784
|
}
|
15785
|
+
|
15786
|
+
._data_8rgix_14 {
|
15787
|
+
border: solid var(--bs-light-border-subtle) 1px;
|
15788
|
+
padding: 1em;
|
15789
|
+
margin-bottom: 0.5em;
|
15790
|
+
}
|
15707
15791
|
._toolImage_bv5nm_1 {
|
15708
15792
|
max-width: 800px;
|
15709
15793
|
border: solid var(--bs-border-color) 1px;
|
@@ -16741,28 +16825,6 @@ thead th {
|
|
16741
16825
|
.card-body.card-no-padding {
|
16742
16826
|
padding: 0;
|
16743
16827
|
}
|
16744
|
-
._keyPairContainer_1ltuo_1 {
|
16745
|
-
display: grid;
|
16746
|
-
grid-template-columns: max-content auto;
|
16747
|
-
column-gap: 0.5em;
|
16748
|
-
padding-top: 4px;
|
16749
|
-
padding-bottom: 4px;
|
16750
|
-
border-bottom: solid 1px var(--bs-border-color);
|
16751
|
-
}
|
16752
|
-
|
16753
|
-
._key_1ltuo_1 {
|
16754
|
-
display: grid;
|
16755
|
-
grid-template-columns: 1em auto;
|
16756
|
-
cursor: pointer;
|
16757
|
-
}
|
16758
|
-
|
16759
|
-
._pre_1ltuo_16 {
|
16760
|
-
margin-bottom: 0;
|
16761
|
-
}
|
16762
|
-
|
16763
|
-
._treeIcon_1ltuo_20 {
|
16764
|
-
margin-top: -3px;
|
16765
|
-
}
|
16766
16828
|
._item_1uzhd_1 {
|
16767
16829
|
margin-bottom: 0em;
|
16768
16830
|
}
|