inspect-ai 0.3.69__py3-none-any.whl → 0.3.70__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +13 -1
- inspect_ai/_display/textual/app.py +3 -2
- inspect_ai/_display/textual/widgets/samples.py +4 -10
- inspect_ai/_display/textual/widgets/transcript.py +25 -12
- inspect_ai/_eval/eval.py +14 -2
- inspect_ai/_eval/evalset.py +6 -1
- inspect_ai/_eval/run.py +6 -0
- inspect_ai/_eval/task/run.py +44 -15
- inspect_ai/_eval/task/task.py +26 -3
- inspect_ai/_util/interrupt.py +6 -0
- inspect_ai/_util/logger.py +19 -0
- inspect_ai/_util/rich.py +7 -8
- inspect_ai/_util/text.py +13 -0
- inspect_ai/_util/transcript.py +10 -2
- inspect_ai/_util/working.py +46 -0
- inspect_ai/_view/www/dist/assets/index.css +56 -12
- inspect_ai/_view/www/dist/assets/index.js +904 -750
- inspect_ai/_view/www/log-schema.json +337 -2
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/node_modules/flatted/python/test.py +63 -0
- inspect_ai/_view/www/src/appearance/icons.ts +3 -1
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +0 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +28 -1
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +23 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -0
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +152 -0
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +9 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +19 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
- inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
- inspect_ai/_view/www/src/types/log.d.ts +188 -108
- inspect_ai/_view/www/src/utils/format.ts +7 -4
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -6
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_condense.py +1 -0
- inspect_ai/log/_log.py +72 -12
- inspect_ai/log/_samples.py +5 -1
- inspect_ai/log/_transcript.py +31 -1
- inspect_ai/model/_call_tools.py +1 -1
- inspect_ai/model/_conversation.py +1 -1
- inspect_ai/model/_model.py +32 -16
- inspect_ai/model/_model_call.py +10 -3
- inspect_ai/model/_providers/anthropic.py +13 -2
- inspect_ai/model/_providers/bedrock.py +7 -0
- inspect_ai/model/_providers/cloudflare.py +20 -7
- inspect_ai/model/_providers/google.py +2 -0
- inspect_ai/model/_providers/groq.py +57 -23
- inspect_ai/model/_providers/hf.py +6 -0
- inspect_ai/model/_providers/mistral.py +78 -51
- inspect_ai/model/_providers/openai.py +9 -0
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/util/tracker.py +92 -0
- inspect_ai/model/_providers/vllm.py +13 -5
- inspect_ai/solver/_basic_agent.py +1 -3
- inspect_ai/solver/_bridge/patch.py +0 -2
- inspect_ai/solver/_limit.py +4 -4
- inspect_ai/solver/_plan.py +0 -3
- inspect_ai/solver/_task_state.py +7 -0
- inspect_ai/tool/_tools/_web_search.py +3 -3
- inspect_ai/util/_concurrency.py +14 -8
- inspect_ai/util/_sandbox/context.py +15 -0
- inspect_ai/util/_sandbox/docker/docker.py +7 -5
- inspect_ai/util/_sandbox/environment.py +32 -1
- inspect_ai/util/_sandbox/events.py +149 -0
- inspect_ai/util/_sandbox/local.py +3 -3
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/METADATA +3 -3
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/RECORD +74 -67
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/top_level.txt +0 -0
inspect_ai/_util/rich.py
CHANGED
@@ -2,23 +2,22 @@ from rich.console import RenderableType
|
|
2
2
|
from rich.style import Style
|
3
3
|
from rich.text import Text
|
4
4
|
|
5
|
+
from inspect_ai._util.text import truncate_lines
|
6
|
+
|
5
7
|
|
6
8
|
def lines_display(
|
7
9
|
text: str, max_lines: int = 100, style: str | Style = ""
|
8
10
|
) -> list[RenderableType]:
|
9
|
-
lines = text
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
]
|
11
|
+
lines, truncated = truncate_lines(text, max_lines)
|
12
|
+
|
13
|
+
content: list[RenderableType] = [Text(lines, style=style)]
|
14
|
+
if truncated is not None:
|
14
15
|
content.append(Text())
|
15
16
|
content.append(
|
16
17
|
Text.from_markup(
|
17
|
-
f"[italic]Output truncated ({
|
18
|
+
f"[italic]Output truncated ({truncated} additional lines)...[/italic]",
|
18
19
|
style=style,
|
19
20
|
)
|
20
21
|
)
|
21
|
-
else:
|
22
|
-
content = [Text(text, style=style)]
|
23
22
|
|
24
23
|
return content
|
inspect_ai/_util/text.py
CHANGED
@@ -134,6 +134,19 @@ def truncate(text: str, length: int, overflow: str = "...", pad: bool = True) ->
|
|
134
134
|
return truncated
|
135
135
|
|
136
136
|
|
137
|
+
def truncate_lines(
|
138
|
+
text: str, max_lines: int = 100, max_characters: int | None = 100 * 100
|
139
|
+
) -> tuple[str, int | None]:
|
140
|
+
if max_characters is not None:
|
141
|
+
text = truncate(text, max_characters)
|
142
|
+
lines = text.splitlines()
|
143
|
+
if len(lines) > max_lines:
|
144
|
+
output = "\n".join(lines[0:max_lines])
|
145
|
+
return output, len(lines) - max_lines
|
146
|
+
else:
|
147
|
+
return text, None
|
148
|
+
|
149
|
+
|
137
150
|
def generate_large_text(target_tokens: int) -> str:
|
138
151
|
"""Generate a large amount of text with approximately the target number of tokens"""
|
139
152
|
generated_text = []
|
inspect_ai/_util/transcript.py
CHANGED
@@ -122,8 +122,16 @@ def transcript_reasoning(reasoning: str) -> list[RenderableType]:
|
|
122
122
|
return content
|
123
123
|
|
124
124
|
|
125
|
-
def transcript_separator(
|
126
|
-
|
125
|
+
def transcript_separator(
|
126
|
+
title: str, color: str, characters: str = "─"
|
127
|
+
) -> RenderableType:
|
128
|
+
return Rule(
|
129
|
+
title=title,
|
130
|
+
characters=characters,
|
131
|
+
style=f"{color} bold",
|
132
|
+
align="center",
|
133
|
+
end="\n\n",
|
134
|
+
)
|
127
135
|
|
128
136
|
|
129
137
|
def transcript_function(function: str, arguments: dict[str, Any]) -> RenderableType:
|
@@ -0,0 +1,46 @@
|
|
1
|
+
import time
|
2
|
+
from contextvars import ContextVar
|
3
|
+
|
4
|
+
|
5
|
+
def init_sample_working_limit(start_time: float, working_limit: float | None) -> None:
|
6
|
+
_sample_working_limit.set(working_limit)
|
7
|
+
_sample_start_time.set(start_time)
|
8
|
+
_sample_waiting_time.set(0)
|
9
|
+
|
10
|
+
|
11
|
+
def sample_waiting_time() -> float:
|
12
|
+
return _sample_waiting_time.get()
|
13
|
+
|
14
|
+
|
15
|
+
def report_sample_waiting_time(waiting_time: float) -> None:
|
16
|
+
_sample_waiting_time.set(_sample_waiting_time.get() + waiting_time)
|
17
|
+
check_sample_working_limit()
|
18
|
+
|
19
|
+
|
20
|
+
def check_sample_working_limit() -> None:
|
21
|
+
# no check if we don't have a limit
|
22
|
+
working_limit = _sample_working_limit.get()
|
23
|
+
if working_limit is None:
|
24
|
+
return
|
25
|
+
|
26
|
+
# are we over the limit?
|
27
|
+
running_time = time.monotonic() - _sample_start_time.get()
|
28
|
+
working_time = running_time - sample_waiting_time()
|
29
|
+
if working_time > working_limit:
|
30
|
+
from inspect_ai.solver._limit import SampleLimitExceededError
|
31
|
+
|
32
|
+
raise SampleLimitExceededError(
|
33
|
+
type="working",
|
34
|
+
value=int(working_time),
|
35
|
+
limit=int(working_limit),
|
36
|
+
message=f"Exceeded working time limit ({working_limit:,} seconds)",
|
37
|
+
)
|
38
|
+
|
39
|
+
|
40
|
+
_sample_working_limit: ContextVar[float | None] = ContextVar(
|
41
|
+
"sample_working_limit", default=None
|
42
|
+
)
|
43
|
+
|
44
|
+
_sample_start_time: ContextVar[float] = ContextVar("sample_start_time", default=0)
|
45
|
+
|
46
|
+
_sample_waiting_time: ContextVar[float] = ContextVar("sample_waiting_time", default=0)
|
@@ -16317,35 +16317,43 @@ ul.jsondiffpatch-textdiff {
|
|
16317
16317
|
width: 100%;
|
16318
16318
|
margin-top: 1em;
|
16319
16319
|
}
|
16320
|
-
.
|
16320
|
+
._tabPanel_1isha_1 {
|
16321
16321
|
padding-bottom: 1em;
|
16322
16322
|
}
|
16323
16323
|
|
16324
|
-
.
|
16324
|
+
._fullWidth_1isha_5 {
|
16325
16325
|
width: 100%;
|
16326
16326
|
}
|
16327
16327
|
|
16328
|
-
.
|
16328
|
+
._metadataPanel_1isha_9 {
|
16329
16329
|
display: flex;
|
16330
16330
|
flex-wrap: wrap;
|
16331
|
-
align-items:
|
16331
|
+
align-items: stretch;
|
16332
16332
|
gap: 1em;
|
16333
16333
|
padding-left: 0;
|
16334
16334
|
margin-top: 0.5em;
|
16335
16335
|
}
|
16336
16336
|
|
16337
|
-
.
|
16337
|
+
._padded_1isha_18 {
|
16338
16338
|
padding-left: 0.8em;
|
16339
16339
|
margin-top: 0.4em;
|
16340
16340
|
}
|
16341
16341
|
|
16342
|
-
.
|
16342
|
+
._ansi_1isha_23 {
|
16343
16343
|
margin: 1em 0;
|
16344
16344
|
}
|
16345
16345
|
|
16346
|
-
.
|
16346
|
+
._noTop_1isha_27 {
|
16347
16347
|
margin-top: 0;
|
16348
16348
|
}
|
16349
|
+
|
16350
|
+
._timePanel_1isha_31 {
|
16351
|
+
display: grid;
|
16352
|
+
grid-template-columns: max-content max-content;
|
16353
|
+
grid-template-rows: auto;
|
16354
|
+
column-gap: 0.5em;
|
16355
|
+
min-width: 200px;
|
16356
|
+
}
|
16349
16357
|
._flatBody_gk2ju_1 {
|
16350
16358
|
color: var(--bs-danger);
|
16351
16359
|
display: grid;
|
@@ -16365,15 +16373,15 @@ ul.jsondiffpatch-textdiff {
|
|
16365
16373
|
line-height: var(--inspect-font-size-base);
|
16366
16374
|
height: var(--inspect-font-size-base);
|
16367
16375
|
}
|
16368
|
-
.
|
16376
|
+
._target_9qy4e_1 {
|
16369
16377
|
padding-left: 0;
|
16370
16378
|
}
|
16371
16379
|
|
16372
|
-
.
|
16380
|
+
._answer_9qy4e_5 {
|
16373
16381
|
padding-left: 0;
|
16374
16382
|
}
|
16375
16383
|
|
16376
|
-
.
|
16384
|
+
._grid_9qy4e_9 {
|
16377
16385
|
display: grid;
|
16378
16386
|
grid-column-gap: 0.5em;
|
16379
16387
|
border-bottom: solid var(--bs-border-color) 1px;
|
@@ -16381,14 +16389,18 @@ ul.jsondiffpatch-textdiff {
|
|
16381
16389
|
padding: 0em 1em 1em 1em;
|
16382
16390
|
}
|
16383
16391
|
|
16384
|
-
.
|
16392
|
+
._centerLabel_9qy4e_17 {
|
16385
16393
|
display: flex;
|
16386
16394
|
justify-content: center;
|
16387
16395
|
}
|
16388
16396
|
|
16389
|
-
.
|
16397
|
+
._wrap_9qy4e_22 {
|
16390
16398
|
word-wrap: anywhere;
|
16391
16399
|
}
|
16400
|
+
|
16401
|
+
._titled_9qy4e_26:hover {
|
16402
|
+
cursor: pointer;
|
16403
|
+
}
|
16392
16404
|
._title_19l1b_1 {
|
16393
16405
|
margin-left: 0.5em;
|
16394
16406
|
display: grid;
|
@@ -19070,6 +19082,38 @@ span.ap-marker-container:hover span.ap-marker {
|
|
19070
19082
|
padding: 0;
|
19071
19083
|
margin-bottom: 0;
|
19072
19084
|
}
|
19085
|
+
._contents_iwnfd_1 {
|
19086
|
+
margin-top: 0.5em;
|
19087
|
+
}
|
19088
|
+
|
19089
|
+
._contents_iwnfd_1 > :last-child {
|
19090
|
+
margin-bottom: 0;
|
19091
|
+
}
|
19092
|
+
|
19093
|
+
._twoColumn_iwnfd_9 {
|
19094
|
+
display: grid;
|
19095
|
+
grid-template-columns: auto 1fr;
|
19096
|
+
column-gap: 1.5em;
|
19097
|
+
}
|
19098
|
+
|
19099
|
+
._exec_iwnfd_15 {
|
19100
|
+
margin-top: 0.5em;
|
19101
|
+
}
|
19102
|
+
|
19103
|
+
._result_iwnfd_19 {
|
19104
|
+
margin-top: 0.5em;
|
19105
|
+
}
|
19106
|
+
|
19107
|
+
._fileLabel_iwnfd_23 {
|
19108
|
+
margin-top: 0;
|
19109
|
+
margin-bottom: 0;
|
19110
|
+
}
|
19111
|
+
|
19112
|
+
._wrapPre_iwnfd_28 {
|
19113
|
+
white-space: pre-wrap;
|
19114
|
+
word-wrap: break-word;
|
19115
|
+
overflow-wrap: break-word;
|
19116
|
+
}
|
19073
19117
|
._darkenedBg_c8m1t_1 {
|
19074
19118
|
background-color: var(--bs-light-bg-subtle);
|
19075
19119
|
}
|