inspect-ai 0.3.104__py3-none-any.whl → 0.3.105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_eval/evalset.py +1 -1
- inspect_ai/_eval/task/run.py +64 -38
- inspect_ai/_view/server.py +17 -0
- inspect_ai/_view/www/dist/assets/index.css +33 -29
- inspect_ai/_view/www/dist/assets/index.js +559 -247
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +4 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -0
- inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +26 -0
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +14 -3
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +359 -7
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/language.ts +6 -0
- inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +1 -1
- inspect_ai/_view/www/src/client/api/api-browser.ts +25 -0
- inspect_ai/_view/www/src/client/api/api-http.ts +3 -0
- inspect_ai/_view/www/src/client/api/api-vscode.ts +6 -0
- inspect_ai/_view/www/src/client/api/client-api.ts +3 -0
- inspect_ai/_view/www/src/client/api/jsonrpc.ts +1 -0
- inspect_ai/_view/www/src/client/api/types.ts +3 -0
- inspect_ai/_view/www/src/state/samplePolling.ts +17 -1
- inspect_ai/agent/_handoff.py +5 -2
- inspect_ai/agent/_react.py +5 -5
- inspect_ai/dataset/_dataset.py +1 -1
- inspect_ai/log/_samples.py +5 -0
- inspect_ai/model/_call_tools.py +4 -4
- inspect_ai/model/_providers/anthropic.py +23 -2
- inspect_ai/model/_providers/google.py +5 -1
- inspect_ai/util/__init__.py +8 -0
- inspect_ai/util/_background.py +64 -0
- inspect_ai/util/_limit.py +72 -5
- inspect_ai/util/_sandbox/__init__.py +2 -0
- inspect_ai/util/_sandbox/service.py +28 -7
- inspect_ai/util/_subprocess.py +51 -38
- {inspect_ai-0.3.104.dist-info → inspect_ai-0.3.105.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.104.dist-info → inspect_ai-0.3.105.dist-info}/RECORD +38 -37
- {inspect_ai-0.3.104.dist-info → inspect_ai-0.3.105.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.104.dist-info → inspect_ai-0.3.105.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.104.dist-info → inspect_ai-0.3.105.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.104.dist-info → inspect_ai-0.3.105.dist-info}/top_level.txt +0 -0
@@ -335,6 +335,9 @@ export const clientApi = (api: LogViewAPI, log_file?: string): ClientAPI => {
|
|
335
335
|
) => {
|
336
336
|
return api.download_file(download_file, file_contents);
|
337
337
|
},
|
338
|
+
log_message: (log_file: string, message: string) => {
|
339
|
+
return api.log_message(log_file, message);
|
340
|
+
},
|
338
341
|
get_log_pending_samples: api.eval_pending_samples
|
339
342
|
? get_log_pending_samples
|
340
343
|
: undefined,
|
@@ -41,6 +41,7 @@ export const kMethodEvalLogBytes = "eval_log_bytes";
|
|
41
41
|
export const kMethodEvalLogHeaders = "eval_log_headers";
|
42
42
|
export const kMethodPendingSamples = "eval_log_pending_samples";
|
43
43
|
export const kMethodSampleData = "eval_log_sample_data";
|
44
|
+
export const kMethodLogMessage = "log_message";
|
44
45
|
|
45
46
|
export const kJsonRpcParseError = -32700;
|
46
47
|
export const kJsonRpcInvalidRequest = -32600;
|
@@ -115,6 +115,7 @@ export interface SampleSummary {
|
|
115
115
|
scores: Scores1;
|
116
116
|
error?: string;
|
117
117
|
limit?: string;
|
118
|
+
metadata?: Record<string, any>;
|
118
119
|
completed?: boolean;
|
119
120
|
retries?: number;
|
120
121
|
}
|
@@ -149,6 +150,7 @@ export interface LogViewAPI {
|
|
149
150
|
end: number,
|
150
151
|
) => Promise<Uint8Array>;
|
151
152
|
eval_log_headers: (log_files: string[]) => Promise<EvalLog[]>;
|
153
|
+
log_message: (log_file: string, message: string) => Promise<void>;
|
152
154
|
download_file: (
|
153
155
|
filename: string,
|
154
156
|
filecontents: string | Blob | ArrayBuffer | ArrayBufferView,
|
@@ -177,6 +179,7 @@ export interface ClientAPI {
|
|
177
179
|
id: string | number,
|
178
180
|
epoch: number,
|
179
181
|
) => Promise<EvalSample | undefined>;
|
182
|
+
log_message?: (log_file: string, message: string) => Promise<void>;
|
180
183
|
download_file: (
|
181
184
|
file_name: string,
|
182
185
|
file_contents: string | Blob | ArrayBuffer | ArrayBufferView,
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import { Event } from "../app/types";
|
2
2
|
import {
|
3
3
|
AttachmentData,
|
4
|
+
ClientAPI,
|
4
5
|
EventData,
|
5
6
|
SampleData,
|
6
7
|
SampleSummary,
|
@@ -183,6 +184,8 @@ export function createSamplePolling(
|
|
183
184
|
const processedEvents = processEvents(
|
184
185
|
sampleDataResponse.sampleData,
|
185
186
|
pollingState,
|
187
|
+
api,
|
188
|
+
logFile,
|
186
189
|
);
|
187
190
|
|
188
191
|
// update max attachment id
|
@@ -268,7 +271,12 @@ function processAttachments(
|
|
268
271
|
});
|
269
272
|
}
|
270
273
|
|
271
|
-
function processEvents(
|
274
|
+
function processEvents(
|
275
|
+
sampleData: SampleData,
|
276
|
+
pollingState: PollingState,
|
277
|
+
api: ClientAPI,
|
278
|
+
log_file: string,
|
279
|
+
) {
|
272
280
|
// Go through each event and resolve it, either appending or replacing
|
273
281
|
log.debug(`Processing ${sampleData.events.length} events`);
|
274
282
|
if (sampleData.events.length === 0) {
|
@@ -289,6 +297,14 @@ function processEvents(sampleData: SampleData, pollingState: PollingState) {
|
|
289
297
|
attachmentId,
|
290
298
|
available_attachments: Object.keys(pollingState.attachments),
|
291
299
|
};
|
300
|
+
|
301
|
+
if (api.log_message) {
|
302
|
+
api.log_message(
|
303
|
+
log_file,
|
304
|
+
`Unable to resolve attachment ${attachmentId}\n` +
|
305
|
+
JSON.stringify(snapshot),
|
306
|
+
);
|
307
|
+
}
|
292
308
|
console.warn(`Unable to resolve attachment ${attachmentId}`, snapshot);
|
293
309
|
},
|
294
310
|
);
|
inspect_ai/agent/_handoff.py
CHANGED
@@ -6,7 +6,7 @@ from inspect_ai._util.registry import (
|
|
6
6
|
registry_unqualified_name,
|
7
7
|
set_registry_info,
|
8
8
|
)
|
9
|
-
from inspect_ai.tool._tool import Tool, ToolResult, ToolSource
|
9
|
+
from inspect_ai.tool._tool import TOOL_PARALLEL, Tool, ToolResult, ToolSource
|
10
10
|
from inspect_ai.tool._tool_def import ToolDef
|
11
11
|
from inspect_ai.tool._tool_description import ToolDescription, set_tool_description
|
12
12
|
from inspect_ai.util._limit import Limit
|
@@ -61,7 +61,10 @@ def handoff(
|
|
61
61
|
agent, tool_info.name, input_filter, output_filter, limits, **agent_kwargs
|
62
62
|
)
|
63
63
|
tool_name = tool_name or f"transfer_to_{tool_info.name}"
|
64
|
-
set_registry_info(
|
64
|
+
set_registry_info(
|
65
|
+
agent_tool,
|
66
|
+
RegistryInfo(type="tool", name=tool_name, metadata={TOOL_PARALLEL: False}),
|
67
|
+
)
|
65
68
|
set_tool_description(
|
66
69
|
agent_tool,
|
67
70
|
ToolDescription(
|
inspect_ai/agent/_react.py
CHANGED
@@ -361,13 +361,13 @@ def _prompt_to_system_message(
|
|
361
361
|
and ("{submit}" not in prompt.assistant_prompt)
|
362
362
|
and prompt.submit_prompt
|
363
363
|
):
|
364
|
-
assistant_prompt = f"{prompt.assistant_prompt}\n{prompt.submit_prompt}"
|
364
|
+
assistant_prompt = f"{prompt.assistant_prompt}\n{prompt.submit_prompt.format(submit=submit_tool)}"
|
365
365
|
else:
|
366
|
-
assistant_prompt = prompt.assistant_prompt
|
366
|
+
assistant_prompt = prompt.assistant_prompt.format(
|
367
|
+
submit=submit_tool or "submit"
|
368
|
+
)
|
367
369
|
prompt_lines.append(assistant_prompt)
|
368
|
-
prompt_content = "\n\n".join(prompt_lines)
|
369
|
-
submit=submit_tool or "submit"
|
370
|
-
)
|
370
|
+
prompt_content = "\n\n".join(prompt_lines)
|
371
371
|
system_message: ChatMessage | None = ChatMessageSystem(content=prompt_content)
|
372
372
|
else:
|
373
373
|
system_message = None
|
inspect_ai/dataset/_dataset.py
CHANGED
inspect_ai/log/_samples.py
CHANGED
@@ -3,6 +3,7 @@ from contextvars import ContextVar
|
|
3
3
|
from datetime import datetime
|
4
4
|
from typing import AsyncGenerator, Iterator, Literal
|
5
5
|
|
6
|
+
from anyio.abc import TaskGroup
|
6
7
|
from shortuuid import uuid
|
7
8
|
|
8
9
|
from inspect_ai.dataset._dataset import Sample
|
@@ -28,6 +29,7 @@ class ActiveSample:
|
|
28
29
|
fails_on_error: bool,
|
29
30
|
transcript: Transcript,
|
30
31
|
sandboxes: dict[str, SandboxConnection],
|
32
|
+
tg: TaskGroup,
|
31
33
|
) -> None:
|
32
34
|
self.id = uuid()
|
33
35
|
self.started: float | None = None
|
@@ -47,6 +49,7 @@ class ActiveSample:
|
|
47
49
|
self.transcript = transcript
|
48
50
|
self.sandboxes = sandboxes
|
49
51
|
self._interrupt_action: Literal["score", "error"] | None = None
|
52
|
+
self.tg = tg
|
50
53
|
|
51
54
|
@property
|
52
55
|
def running_time(self) -> float:
|
@@ -86,6 +89,7 @@ async def active_sample(
|
|
86
89
|
working_limit: int | None,
|
87
90
|
fails_on_error: bool,
|
88
91
|
transcript: Transcript,
|
92
|
+
tg: TaskGroup,
|
89
93
|
) -> AsyncGenerator[ActiveSample, None]:
|
90
94
|
# create the sample
|
91
95
|
active = ActiveSample(
|
@@ -101,6 +105,7 @@ async def active_sample(
|
|
101
105
|
sandboxes=await sandbox_connections(),
|
102
106
|
fails_on_error=fails_on_error,
|
103
107
|
transcript=transcript,
|
108
|
+
tg=tg,
|
104
109
|
)
|
105
110
|
|
106
111
|
_active_samples.append(active)
|
inspect_ai/model/_call_tools.py
CHANGED
@@ -534,11 +534,11 @@ def prepend_agent_name(
|
|
534
534
|
content = copy(message.content)
|
535
535
|
for i in range(0, len(content)):
|
536
536
|
if isinstance(content[i], ContentText):
|
537
|
-
|
538
|
-
|
539
|
-
|
537
|
+
text = cast(ContentText, content[i]).text
|
538
|
+
if text:
|
539
|
+
content[i] = content[i].model_copy(
|
540
|
+
update=dict(text=f"[{agent_name}] {text}")
|
540
541
|
)
|
541
|
-
)
|
542
542
|
break
|
543
543
|
return message.model_copy(update=dict(content=content))
|
544
544
|
|
@@ -41,6 +41,7 @@ from anthropic.types import (
|
|
41
41
|
from anthropic.types.beta import (
|
42
42
|
BetaToolComputerUse20250124Param,
|
43
43
|
BetaToolTextEditor20241022Param,
|
44
|
+
BetaToolTextEditor20250429Param,
|
44
45
|
)
|
45
46
|
from pydantic import JsonValue
|
46
47
|
from typing_extensions import override
|
@@ -397,6 +398,9 @@ class AnthropicAPI(ModelAPI):
|
|
397
398
|
def is_claude_3_7(self) -> bool:
|
398
399
|
return "claude-3-7-" in self.service_model_name()
|
399
400
|
|
401
|
+
def is_claude_4(self) -> bool:
|
402
|
+
return re.search(r"claude-4-[a-zA-Z]", self.service_model_name()) is not None
|
403
|
+
|
400
404
|
@override
|
401
405
|
def connection_key(self) -> str:
|
402
406
|
return str(self.api_key)
|
@@ -627,7 +631,17 @@ class AnthropicAPI(ModelAPI):
|
|
627
631
|
|
628
632
|
def text_editor_tool_param(
|
629
633
|
self, tool: ToolInfo
|
630
|
-
) ->
|
634
|
+
) -> (
|
635
|
+
ToolTextEditor20250124Param
|
636
|
+
| BetaToolTextEditor20241022Param
|
637
|
+
| BetaToolTextEditor20250429Param
|
638
|
+
| None
|
639
|
+
):
|
640
|
+
# See: https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/text-editor-tool#before-using-the-text-editor-tool
|
641
|
+
# TODO: It would be great to enhance our `is_claude_xxx` functions to help here.
|
642
|
+
if self.model_name.startswith(("claude-3-5-haiku", "claude-3-opus")):
|
643
|
+
return None
|
644
|
+
|
631
645
|
# check for compatible 'text editor' tool
|
632
646
|
if tool.name == "text_editor" and (
|
633
647
|
sorted(tool.parameters.properties.keys())
|
@@ -644,7 +658,11 @@ class AnthropicAPI(ModelAPI):
|
|
644
658
|
)
|
645
659
|
):
|
646
660
|
return (
|
647
|
-
|
661
|
+
BetaToolTextEditor20250429Param(
|
662
|
+
type="text_editor_20250429", name="str_replace_based_edit_tool"
|
663
|
+
)
|
664
|
+
if self.is_claude_4()
|
665
|
+
else BetaToolTextEditor20241022Param(
|
648
666
|
type="text_editor_20241022", name="str_replace_editor"
|
649
667
|
)
|
650
668
|
if self.is_claude_3_5()
|
@@ -706,6 +724,7 @@ ToolParamDef = (
|
|
706
724
|
| BetaToolComputerUse20250124Param
|
707
725
|
| ToolTextEditor20250124Param
|
708
726
|
| BetaToolTextEditor20241022Param
|
727
|
+
| BetaToolTextEditor20250429Param
|
709
728
|
| WebSearchTool20250305Param
|
710
729
|
)
|
711
730
|
|
@@ -716,6 +735,7 @@ def add_cache_control(
|
|
716
735
|
| BetaToolComputerUse20250124Param
|
717
736
|
| ToolTextEditor20250124Param
|
718
737
|
| BetaToolTextEditor20241022Param
|
738
|
+
| BetaToolTextEditor20250429Param
|
719
739
|
| WebSearchTool20250305Param
|
720
740
|
| dict[str, Any],
|
721
741
|
) -> None:
|
@@ -1008,6 +1028,7 @@ def _names_for_tool_call(
|
|
1008
1028
|
(INTERNAL_COMPUTER_TOOL_NAME, "computer_20250124", "computer"),
|
1009
1029
|
("str_replace_editor", "text_editor_20241022", "text_editor"),
|
1010
1030
|
("str_replace_editor", "text_editor_20250124", "text_editor"),
|
1031
|
+
("str_replace_based_edit_tool", "text_editor_20250429", "text_editor"),
|
1011
1032
|
("bash", "bash_20250124", "bash_session"),
|
1012
1033
|
)
|
1013
1034
|
|
@@ -991,6 +991,10 @@ def _combine_text_parts(acc: list[Part], part: Part) -> list[Part]:
|
|
991
991
|
"""Combine adjacent text parts into a single part."""
|
992
992
|
return (
|
993
993
|
acc + [part]
|
994
|
-
if part.text is None
|
994
|
+
if part.text is None
|
995
|
+
or part.thought is True
|
996
|
+
or len(acc) == 0
|
997
|
+
or acc[-1].text is None
|
998
|
+
or acc[-1].thought is True
|
995
999
|
else acc[:-1] + [Part(text=acc[-1].text + part.text)]
|
996
1000
|
)
|
inspect_ai/util/__init__.py
CHANGED
@@ -4,13 +4,16 @@ from inspect_ai.util._limit import (
|
|
4
4
|
Limit,
|
5
5
|
LimitExceededError,
|
6
6
|
LimitScope,
|
7
|
+
SampleLimits,
|
7
8
|
apply_limits,
|
8
9
|
message_limit,
|
10
|
+
sample_limits,
|
9
11
|
time_limit,
|
10
12
|
token_limit,
|
11
13
|
working_limit,
|
12
14
|
)
|
13
15
|
|
16
|
+
from ._background import background
|
14
17
|
from ._collect import collect
|
15
18
|
from ._concurrency import concurrency
|
16
19
|
from ._console import input_screen
|
@@ -29,6 +32,7 @@ from ._sandbox import (
|
|
29
32
|
SandboxEnvironmentType,
|
30
33
|
sandbox,
|
31
34
|
sandbox_default,
|
35
|
+
sandbox_service,
|
32
36
|
sandbox_with,
|
33
37
|
sandboxenv,
|
34
38
|
)
|
@@ -44,6 +48,8 @@ from ._throttle import throttle
|
|
44
48
|
|
45
49
|
__all__ = [
|
46
50
|
"apply_limits",
|
51
|
+
"sample_limits",
|
52
|
+
"SampleLimits",
|
47
53
|
"ExecResult",
|
48
54
|
"concurrency",
|
49
55
|
"DisplayType",
|
@@ -73,6 +79,7 @@ __all__ = [
|
|
73
79
|
"sandbox",
|
74
80
|
"sandbox_with",
|
75
81
|
"sandbox_default",
|
82
|
+
"sandbox_service",
|
76
83
|
"Store",
|
77
84
|
"store",
|
78
85
|
"StoreModel",
|
@@ -82,6 +89,7 @@ __all__ = [
|
|
82
89
|
"Subtask",
|
83
90
|
"subtask",
|
84
91
|
"throttle",
|
92
|
+
"background",
|
85
93
|
"token_limit",
|
86
94
|
"time_limit",
|
87
95
|
"working_limit",
|
@@ -0,0 +1,64 @@
|
|
1
|
+
import sys
|
2
|
+
from logging import getLogger
|
3
|
+
from typing import Any, Awaitable, Callable
|
4
|
+
|
5
|
+
if sys.version_info >= (3, 11):
|
6
|
+
from typing import TypeVarTuple
|
7
|
+
else:
|
8
|
+
from typing_extensions import TypeVarTuple
|
9
|
+
|
10
|
+
|
11
|
+
from typing_extensions import Unpack
|
12
|
+
|
13
|
+
logger = getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
PosArgsT = TypeVarTuple("PosArgsT")
|
17
|
+
|
18
|
+
|
19
|
+
def background(
|
20
|
+
func: Callable[[Unpack[PosArgsT]], Awaitable[Any]],
|
21
|
+
*args: Unpack[PosArgsT],
|
22
|
+
) -> None:
|
23
|
+
"""Run an async function in the background of the current sample.
|
24
|
+
|
25
|
+
Background functions must be run from an executing sample.
|
26
|
+
The function will run as long as the current sample is running.
|
27
|
+
|
28
|
+
When the sample terminates, an anyio cancelled error will be
|
29
|
+
raised in the background function. To catch this error and
|
30
|
+
cleanup:
|
31
|
+
|
32
|
+
```python
|
33
|
+
import anyio
|
34
|
+
|
35
|
+
async def run():
|
36
|
+
try:
|
37
|
+
# background code
|
38
|
+
except anyio.get_cancelled_exc_class():
|
39
|
+
...
|
40
|
+
```
|
41
|
+
|
42
|
+
Args:
|
43
|
+
func: Async function to run
|
44
|
+
*args: Optional function arguments.
|
45
|
+
"""
|
46
|
+
from inspect_ai.log._samples import sample_active
|
47
|
+
|
48
|
+
# get the active sample
|
49
|
+
sample = sample_active()
|
50
|
+
if sample is None:
|
51
|
+
raise RuntimeError(
|
52
|
+
"background() function must be called from a running sample."
|
53
|
+
)
|
54
|
+
|
55
|
+
# handle and log background exceptions
|
56
|
+
async def run() -> None:
|
57
|
+
try:
|
58
|
+
await func(*args)
|
59
|
+
except Exception as ex:
|
60
|
+
logger.error(f"Background worker error: {ex}")
|
61
|
+
raise
|
62
|
+
|
63
|
+
# kick it off
|
64
|
+
sample.tg.start_soon(run)
|
inspect_ai/util/_limit.py
CHANGED
@@ -4,6 +4,7 @@ import abc
|
|
4
4
|
import logging
|
5
5
|
from contextlib import ExitStack, contextmanager
|
6
6
|
from contextvars import ContextVar
|
7
|
+
from dataclasses import dataclass
|
7
8
|
from types import TracebackType
|
8
9
|
from typing import TYPE_CHECKING, Generic, Iterator, Literal, TypeVar
|
9
10
|
|
@@ -88,12 +89,31 @@ class Limit(abc.ABC):
|
|
88
89
|
) -> None:
|
89
90
|
pass
|
90
91
|
|
92
|
+
@property
|
93
|
+
@abc.abstractmethod
|
94
|
+
def limit(self) -> float | None:
|
95
|
+
"""The value of the limit being applied.
|
96
|
+
|
97
|
+
Can be None which represents no limit.
|
98
|
+
"""
|
99
|
+
pass
|
100
|
+
|
91
101
|
@property
|
92
102
|
@abc.abstractmethod
|
93
103
|
def usage(self) -> float:
|
94
104
|
"""The current usage of the resource being limited."""
|
95
105
|
pass
|
96
106
|
|
107
|
+
@property
|
108
|
+
def remaining(self) -> float | None:
|
109
|
+
"""The remaining "unused" amount of the resource being limited.
|
110
|
+
|
111
|
+
Returns None if the limit is None.
|
112
|
+
"""
|
113
|
+
if self.limit is None:
|
114
|
+
return None
|
115
|
+
return self.limit - self.usage
|
116
|
+
|
97
117
|
def _check_reuse(self) -> None:
|
98
118
|
if self._entered:
|
99
119
|
raise RuntimeError(
|
@@ -152,6 +172,46 @@ class LimitScope:
|
|
152
172
|
self.limit_error: LimitExceededError | None = None
|
153
173
|
|
154
174
|
|
175
|
+
@dataclass
|
176
|
+
class SampleLimits:
|
177
|
+
"""Data class to hold the limits applied to a Sample.
|
178
|
+
|
179
|
+
This is used to return the limits from `sample_limits()`.
|
180
|
+
"""
|
181
|
+
|
182
|
+
token: Limit
|
183
|
+
"""Token limit."""
|
184
|
+
|
185
|
+
message: Limit
|
186
|
+
"""Message limit."""
|
187
|
+
|
188
|
+
working: Limit
|
189
|
+
"""Working limit."""
|
190
|
+
|
191
|
+
time: Limit
|
192
|
+
"""Time limit."""
|
193
|
+
|
194
|
+
|
195
|
+
def sample_limits() -> SampleLimits:
|
196
|
+
"""Get the top-level limits applied to the current `Sample`."""
|
197
|
+
|
198
|
+
def get_root_node(node: TNode | None, name: str) -> TNode:
|
199
|
+
if node is None:
|
200
|
+
raise RuntimeError(
|
201
|
+
f"No {name} limit node found. Is there a running sample?"
|
202
|
+
)
|
203
|
+
while node.parent is not None:
|
204
|
+
node = node.parent
|
205
|
+
return node
|
206
|
+
|
207
|
+
return SampleLimits(
|
208
|
+
token=get_root_node(token_limit_tree.get(), "token"),
|
209
|
+
message=get_root_node(message_limit_tree.get(), "message"),
|
210
|
+
working=get_root_node(working_limit_tree.get(), "working"),
|
211
|
+
time=get_root_node(time_limit_tree.get(), "time"),
|
212
|
+
)
|
213
|
+
|
214
|
+
|
155
215
|
def token_limit(limit: int | None) -> _TokenLimit:
|
156
216
|
"""Limits the total number of tokens which can be used.
|
157
217
|
|
@@ -319,10 +379,9 @@ class _Tree(Generic[TNode]):
|
|
319
379
|
|
320
380
|
|
321
381
|
token_limit_tree: _Tree[_TokenLimit] = _Tree("token_limit_tree")
|
322
|
-
# Store the message limit leaf node so that we know which limit to check in
|
323
|
-
# check_message_limit().
|
324
382
|
message_limit_tree: _Tree[_MessageLimit] = _Tree("message_limit_tree")
|
325
383
|
working_limit_tree: _Tree[_WorkingLimit] = _Tree("working_limit_tree")
|
384
|
+
time_limit_tree: _Tree[_TimeLimit] = _Tree("time_limit_tree")
|
326
385
|
|
327
386
|
|
328
387
|
class _Node:
|
@@ -497,7 +556,7 @@ class _MessageLimit(Limit, _Node):
|
|
497
556
|
)
|
498
557
|
|
499
558
|
|
500
|
-
class _TimeLimit(Limit):
|
559
|
+
class _TimeLimit(Limit, _Node):
|
501
560
|
def __init__(self, limit: float | None) -> None:
|
502
561
|
super().__init__()
|
503
562
|
_validate_time_limit("Time", limit)
|
@@ -507,8 +566,7 @@ class _TimeLimit(Limit):
|
|
507
566
|
|
508
567
|
def __enter__(self) -> Limit:
|
509
568
|
super()._check_reuse()
|
510
|
-
|
511
|
-
# of the state.
|
569
|
+
time_limit_tree.push(self)
|
512
570
|
self._cancel_scope = anyio.move_on_after(self._limit)
|
513
571
|
self._cancel_scope.__enter__()
|
514
572
|
self._start_time = anyio.current_time()
|
@@ -524,6 +582,7 @@ class _TimeLimit(Limit):
|
|
524
582
|
|
525
583
|
self._cancel_scope.__exit__(exc_type, exc_val, exc_tb)
|
526
584
|
self._end_time = anyio.current_time()
|
585
|
+
self._pop_and_check_identity(time_limit_tree)
|
527
586
|
if self._cancel_scope.cancel_called and self._limit is not None:
|
528
587
|
message = f"Time limit exceeded. limit: {self._limit} seconds"
|
529
588
|
assert self._start_time is not None
|
@@ -541,6 +600,10 @@ class _TimeLimit(Limit):
|
|
541
600
|
source=self,
|
542
601
|
) from exc_val
|
543
602
|
|
603
|
+
@property
|
604
|
+
def limit(self) -> float | None:
|
605
|
+
return self._limit
|
606
|
+
|
544
607
|
@property
|
545
608
|
def usage(self) -> float:
|
546
609
|
if self._start_time is None:
|
@@ -575,6 +638,10 @@ class _WorkingLimit(Limit, _Node):
|
|
575
638
|
self._end_time = anyio.current_time()
|
576
639
|
self._pop_and_check_identity(working_limit_tree)
|
577
640
|
|
641
|
+
@property
|
642
|
+
def limit(self) -> float | None:
|
643
|
+
return self._limit
|
644
|
+
|
578
645
|
@property
|
579
646
|
def usage(self) -> float:
|
580
647
|
if self._start_time is None:
|
@@ -13,6 +13,7 @@ from .environment import (
|
|
13
13
|
from .limits import OutputLimitExceededError, SandboxEnvironmentLimits
|
14
14
|
from .local import LocalSandboxEnvironment # noqa: F401
|
15
15
|
from .registry import sandboxenv
|
16
|
+
from .service import sandbox_service
|
16
17
|
|
17
18
|
__all__ = [
|
18
19
|
"OutputLimitExceededError",
|
@@ -27,4 +28,5 @@ __all__ = [
|
|
27
28
|
"sandbox",
|
28
29
|
"sandbox_with",
|
29
30
|
"sandbox_default",
|
31
|
+
"sandbox_service",
|
30
32
|
]
|
@@ -44,14 +44,35 @@ async def sandbox_service(
|
|
44
44
|
) -> None:
|
45
45
|
"""Run a service that is callable from within a sandbox.
|
46
46
|
|
47
|
+
The service makes available a set of methods to a sandbox
|
48
|
+
for calling back into the main Inspect process.
|
49
|
+
|
50
|
+
To use the service from within a sandbox, either add it to the sys path
|
51
|
+
or use importlib. For example, if the service is named 'foo':
|
52
|
+
|
53
|
+
```python
|
54
|
+
import sys
|
55
|
+
sys.path.append("/var/tmp/sandbox-services/foo")
|
56
|
+
import foo
|
57
|
+
```
|
58
|
+
|
59
|
+
Or:
|
60
|
+
|
61
|
+
```python
|
62
|
+
import importlib.util
|
63
|
+
spec = importlib.util.spec_from_file_location(
|
64
|
+
"foo", "/var/tmp/sandbox-services/foo/foo.py"
|
65
|
+
)
|
66
|
+
foo = importlib.util.module_from_spec(spec)
|
67
|
+
spec.loader.exec_module(foo)
|
68
|
+
```
|
69
|
+
|
47
70
|
Args:
|
48
|
-
name
|
49
|
-
methods
|
50
|
-
until
|
51
|
-
|
52
|
-
|
53
|
-
user (str | None): User to login as. Defaults to the sandbox environment's
|
54
|
-
default user.
|
71
|
+
name: Service name
|
72
|
+
methods: Service methods.
|
73
|
+
until: Function used to check whether the service should stop.
|
74
|
+
sandbox: Sandbox to publish service to.
|
75
|
+
user: User to login as. Defaults to the sandbox environment's default user.
|
55
76
|
"""
|
56
77
|
# setup and start service
|
57
78
|
service = SandboxService(name, sandbox, user)
|