inspect-ai 0.3.75__py3-none-any.whl → 0.3.77__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +16 -0
- inspect_ai/_display/core/results.py +6 -1
- inspect_ai/_eval/eval.py +8 -1
- inspect_ai/_eval/evalset.py +6 -2
- inspect_ai/_eval/registry.py +3 -5
- inspect_ai/_eval/run.py +7 -2
- inspect_ai/_eval/task/run.py +4 -0
- inspect_ai/_util/content.py +3 -0
- inspect_ai/_util/logger.py +3 -0
- inspect_ai/_view/www/dist/assets/index.css +28 -16
- inspect_ai/_view/www/dist/assets/index.js +4811 -4609
- inspect_ai/_view/www/log-schema.json +79 -9
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +22 -4
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +1 -1
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +2 -2
- inspect_ai/_view/www/src/types/log.d.ts +11 -5
- inspect_ai/log/_recorders/json.py +8 -0
- inspect_ai/log/_transcript.py +13 -4
- inspect_ai/model/_call_tools.py +13 -4
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_model.py +5 -1
- inspect_ai/model/_model_output.py +6 -1
- inspect_ai/model/_openai.py +78 -10
- inspect_ai/model/_openai_responses.py +277 -0
- inspect_ai/model/_providers/anthropic.py +134 -75
- inspect_ai/model/_providers/azureai.py +2 -2
- inspect_ai/model/_providers/mistral.py +29 -13
- inspect_ai/model/_providers/openai.py +64 -57
- inspect_ai/model/_providers/openai_responses.py +177 -0
- inspect_ai/model/_providers/openrouter.py +52 -2
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/vertex.py +5 -2
- inspect_ai/tool/__init__.py +6 -0
- inspect_ai/tool/_tool.py +23 -3
- inspect_ai/tool/_tool_call.py +5 -2
- inspect_ai/tool/_tool_support_helpers.py +200 -0
- inspect_ai/tool/_tools/_bash_session.py +119 -0
- inspect_ai/tool/_tools/_computer/_computer.py +1 -1
- inspect_ai/tool/_tools/_text_editor.py +121 -0
- inspect_ai/tool/_tools/_think.py +48 -0
- inspect_ai/tool/_tools/_web_browser/_back_compat.py +150 -0
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +75 -130
- inspect_ai/tool/_tools/_web_search.py +1 -1
- inspect_ai/util/_json.py +28 -0
- inspect_ai/util/_sandbox/context.py +16 -7
- inspect_ai/util/_sandbox/docker/config.py +1 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -3
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/METADATA +5 -2
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/RECORD +56 -80
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/WHEEL +1 -1
- inspect_ai/model/_image.py +0 -15
- inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +0 -8
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +0 -24
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +0 -25
- inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +0 -22
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +0 -63
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +0 -71
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +0 -323
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +0 -5
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +0 -279
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +0 -9
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +0 -293
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +0 -94
- inspect_ai/tool/_tools/_web_browser/_resources/constants.py +0 -2
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +0 -2
- inspect_ai/tool/_tools/_web_browser/_resources/mock_environment.py +0 -45
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +0 -50
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +0 -48
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +0 -280
- inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +0 -65
- inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +0 -64
- inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +0 -146
- inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +0 -64
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +0 -180
- inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +0 -99
- inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +0 -15
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +0 -44
- inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +0 -39
- inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +0 -214
- inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +0 -35
- inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +0 -192
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info/licenses}/LICENSE +0 -0
- {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/top_level.txt +0 -0
@@ -200,8 +200,16 @@
|
|
200
200
|
"description": "Assistant chat message.",
|
201
201
|
"properties": {
|
202
202
|
"id": {
|
203
|
-
"
|
204
|
-
|
203
|
+
"anyOf": [
|
204
|
+
{
|
205
|
+
"type": "string"
|
206
|
+
},
|
207
|
+
{
|
208
|
+
"type": "null"
|
209
|
+
}
|
210
|
+
],
|
211
|
+
"default": null,
|
212
|
+
"title": "Id"
|
205
213
|
},
|
206
214
|
"content": {
|
207
215
|
"anyOf": [
|
@@ -286,8 +294,16 @@
|
|
286
294
|
"description": "System chat message.",
|
287
295
|
"properties": {
|
288
296
|
"id": {
|
289
|
-
"
|
290
|
-
|
297
|
+
"anyOf": [
|
298
|
+
{
|
299
|
+
"type": "string"
|
300
|
+
},
|
301
|
+
{
|
302
|
+
"type": "null"
|
303
|
+
}
|
304
|
+
],
|
305
|
+
"default": null,
|
306
|
+
"title": "Id"
|
291
307
|
},
|
292
308
|
"content": {
|
293
309
|
"anyOf": [
|
@@ -356,8 +372,16 @@
|
|
356
372
|
"description": "Tool chat message.",
|
357
373
|
"properties": {
|
358
374
|
"id": {
|
359
|
-
"
|
360
|
-
|
375
|
+
"anyOf": [
|
376
|
+
{
|
377
|
+
"type": "string"
|
378
|
+
},
|
379
|
+
{
|
380
|
+
"type": "null"
|
381
|
+
}
|
382
|
+
],
|
383
|
+
"default": null,
|
384
|
+
"title": "Id"
|
361
385
|
},
|
362
386
|
"content": {
|
363
387
|
"anyOf": [
|
@@ -435,6 +459,18 @@
|
|
435
459
|
"default": null,
|
436
460
|
"title": "Function"
|
437
461
|
},
|
462
|
+
"internal_name": {
|
463
|
+
"anyOf": [
|
464
|
+
{
|
465
|
+
"type": "string"
|
466
|
+
},
|
467
|
+
{
|
468
|
+
"type": "null"
|
469
|
+
}
|
470
|
+
],
|
471
|
+
"default": null,
|
472
|
+
"title": "Internal Name"
|
473
|
+
},
|
438
474
|
"error": {
|
439
475
|
"anyOf": [
|
440
476
|
{
|
@@ -454,6 +490,7 @@
|
|
454
490
|
"role",
|
455
491
|
"tool_call_id",
|
456
492
|
"function",
|
493
|
+
"internal_name",
|
457
494
|
"error"
|
458
495
|
],
|
459
496
|
"title": "ChatMessageTool",
|
@@ -464,8 +501,16 @@
|
|
464
501
|
"description": "User chat message.",
|
465
502
|
"properties": {
|
466
503
|
"id": {
|
467
|
-
"
|
468
|
-
|
504
|
+
"anyOf": [
|
505
|
+
{
|
506
|
+
"type": "string"
|
507
|
+
},
|
508
|
+
{
|
509
|
+
"type": "null"
|
510
|
+
}
|
511
|
+
],
|
512
|
+
"default": null,
|
513
|
+
"title": "Id"
|
469
514
|
},
|
470
515
|
"content": {
|
471
516
|
"anyOf": [
|
@@ -4431,10 +4476,21 @@
|
|
4431
4476
|
"type": "object"
|
4432
4477
|
},
|
4433
4478
|
"type": {
|
4434
|
-
"const": "function",
|
4435
4479
|
"title": "Type",
|
4436
4480
|
"type": "string"
|
4437
4481
|
},
|
4482
|
+
"internal_name": {
|
4483
|
+
"anyOf": [
|
4484
|
+
{
|
4485
|
+
"type": "string"
|
4486
|
+
},
|
4487
|
+
{
|
4488
|
+
"type": "null"
|
4489
|
+
}
|
4490
|
+
],
|
4491
|
+
"default": null,
|
4492
|
+
"title": "Internal Name"
|
4493
|
+
},
|
4438
4494
|
"parse_error": {
|
4439
4495
|
"anyOf": [
|
4440
4496
|
{
|
@@ -4464,6 +4520,7 @@
|
|
4464
4520
|
"function",
|
4465
4521
|
"arguments",
|
4466
4522
|
"type",
|
4523
|
+
"internal_name",
|
4467
4524
|
"parse_error",
|
4468
4525
|
"view"
|
4469
4526
|
],
|
@@ -4623,6 +4680,18 @@
|
|
4623
4680
|
"title": "Arguments",
|
4624
4681
|
"type": "object"
|
4625
4682
|
},
|
4683
|
+
"internal_name": {
|
4684
|
+
"anyOf": [
|
4685
|
+
{
|
4686
|
+
"type": "string"
|
4687
|
+
},
|
4688
|
+
{
|
4689
|
+
"type": "null"
|
4690
|
+
}
|
4691
|
+
],
|
4692
|
+
"default": null,
|
4693
|
+
"title": "Internal Name"
|
4694
|
+
},
|
4626
4695
|
"view": {
|
4627
4696
|
"anyOf": [
|
4628
4697
|
{
|
@@ -4809,6 +4878,7 @@
|
|
4809
4878
|
"id",
|
4810
4879
|
"function",
|
4811
4880
|
"arguments",
|
4881
|
+
"internal_name",
|
4812
4882
|
"view",
|
4813
4883
|
"result",
|
4814
4884
|
"truncated",
|
@@ -83,8 +83,24 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
|
|
83
83
|
: !isContentImage(output);
|
84
84
|
const normalizedContent = useMemo(() => normalizeContent(output), [output]);
|
85
85
|
|
86
|
-
const
|
86
|
+
const hasContent = normalizedContent.find((c) => {
|
87
|
+
if (c.type === "tool") {
|
88
|
+
for (const t of c.content) {
|
89
|
+
if (t.type === "text") {
|
90
|
+
if (t.text) {
|
91
|
+
return true;
|
92
|
+
}
|
93
|
+
} else {
|
94
|
+
return true;
|
95
|
+
}
|
96
|
+
}
|
97
|
+
return false;
|
98
|
+
} else {
|
99
|
+
return true;
|
100
|
+
}
|
101
|
+
});
|
87
102
|
|
103
|
+
const contents = mode !== "compact" ? input : input || functionCall;
|
88
104
|
return (
|
89
105
|
<div>
|
90
106
|
{mode !== "compact" && (!view || view.title) ? (
|
@@ -99,9 +115,11 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
|
|
99
115
|
contents={contents}
|
100
116
|
toolCallView={view}
|
101
117
|
/>
|
102
|
-
|
103
|
-
<
|
104
|
-
|
118
|
+
{hasContent ? (
|
119
|
+
<ExpandablePanel collapse={collapse} border={true} lines={15}>
|
120
|
+
<MessageContent contents={normalizedContent} />
|
121
|
+
</ExpandablePanel>
|
122
|
+
) : undefined}
|
105
123
|
</div>
|
106
124
|
</div>
|
107
125
|
</div>
|
@@ -9,7 +9,7 @@ export const categoricalScoreDescriptor = (
|
|
9
9
|
scoreType: kScoreTypeCategorical,
|
10
10
|
categories: values,
|
11
11
|
compare: (a, b) => {
|
12
|
-
return String(a).localeCompare(String(b));
|
12
|
+
return String(a.value).localeCompare(String(b.value));
|
13
13
|
},
|
14
14
|
render: (score) => {
|
15
15
|
return String(score);
|
@@ -13,8 +13,8 @@ export const numericScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
|
|
13
13
|
min: Math.min(...onlyNumeric),
|
14
14
|
max: Math.max(...onlyNumeric),
|
15
15
|
compare: (a, b) => {
|
16
|
-
if (typeof a === "number" && typeof b === "number") {
|
17
|
-
return a - b;
|
16
|
+
if (typeof a.value === "number" && typeof b.value === "number") {
|
17
|
+
return a.value - b.value;
|
18
18
|
} else {
|
19
19
|
console.warn("Comparing non-numerics using a numeric score descriptor");
|
20
20
|
return 0;
|
@@ -148,7 +148,7 @@ export type Input =
|
|
148
148
|
| ChatMessageAssistant
|
149
149
|
| ChatMessageTool
|
150
150
|
)[];
|
151
|
-
export type Id1 = string;
|
151
|
+
export type Id1 = string | null;
|
152
152
|
export type Content =
|
153
153
|
| string
|
154
154
|
| (
|
@@ -175,7 +175,7 @@ export type Video = string;
|
|
175
175
|
export type Format1 = "mp4" | "mpeg" | "mov";
|
176
176
|
export type Source = ("input" | "generate") | null;
|
177
177
|
export type Role = "system";
|
178
|
-
export type Id2 = string;
|
178
|
+
export type Id2 = string | null;
|
179
179
|
export type Content1 =
|
180
180
|
| string
|
181
181
|
| (
|
@@ -188,7 +188,7 @@ export type Content1 =
|
|
188
188
|
export type Source1 = ("input" | "generate") | null;
|
189
189
|
export type Role1 = "user";
|
190
190
|
export type ToolCallId = string[] | null;
|
191
|
-
export type Id3 = string;
|
191
|
+
export type Id3 = string | null;
|
192
192
|
export type Content2 =
|
193
193
|
| string
|
194
194
|
| (
|
@@ -203,12 +203,13 @@ export type Role2 = "assistant";
|
|
203
203
|
export type ToolCalls = ToolCall[] | null;
|
204
204
|
export type Id4 = string;
|
205
205
|
export type Function = string;
|
206
|
-
export type Type8 =
|
206
|
+
export type Type8 = string;
|
207
|
+
export type InternalName = string | null;
|
207
208
|
export type ParseError = string | null;
|
208
209
|
export type Title = string | null;
|
209
210
|
export type Format2 = "text" | "markdown";
|
210
211
|
export type Content3 = string;
|
211
|
-
export type Id5 = string;
|
212
|
+
export type Id5 = string | null;
|
212
213
|
export type Content4 =
|
213
214
|
| string
|
214
215
|
| (
|
@@ -222,6 +223,7 @@ export type Source3 = ("input" | "generate") | null;
|
|
222
223
|
export type Role3 = "tool";
|
223
224
|
export type ToolCallId1 = string | null;
|
224
225
|
export type Function1 = string | null;
|
226
|
+
export type InternalName1 = string | null;
|
225
227
|
export type Type9 =
|
226
228
|
| "parsing"
|
227
229
|
| "timeout"
|
@@ -369,6 +371,7 @@ export type Event6 = "tool";
|
|
369
371
|
export type Type12 = "function";
|
370
372
|
export type Id7 = string;
|
371
373
|
export type Function2 = string;
|
374
|
+
export type InternalName2 = string | null;
|
372
375
|
export type Result1 =
|
373
376
|
| string
|
374
377
|
| number
|
@@ -911,6 +914,7 @@ export interface ToolCall {
|
|
911
914
|
function: Function;
|
912
915
|
arguments: Arguments;
|
913
916
|
type: Type8;
|
917
|
+
internal_name: InternalName;
|
914
918
|
parse_error: ParseError;
|
915
919
|
view: ToolCallContent | null;
|
916
920
|
}
|
@@ -933,6 +937,7 @@ export interface ChatMessageTool {
|
|
933
937
|
role: Role3;
|
934
938
|
tool_call_id: ToolCallId1;
|
935
939
|
function: Function1;
|
940
|
+
internal_name: InternalName1;
|
936
941
|
error: ToolCallError | null;
|
937
942
|
}
|
938
943
|
export interface ToolCallError {
|
@@ -1201,6 +1206,7 @@ export interface ToolEvent {
|
|
1201
1206
|
id: Id7;
|
1202
1207
|
function: Function2;
|
1203
1208
|
arguments: Arguments1;
|
1209
|
+
internal_name: InternalName2;
|
1204
1210
|
view: ToolCallContent | null;
|
1205
1211
|
result: Result1;
|
1206
1212
|
truncated: Truncated;
|
@@ -217,6 +217,11 @@ def _read_header_streaming(log_file: str) -> EvalLog:
|
|
217
217
|
|
218
218
|
# Parse the log file, stopping before parsing samples
|
219
219
|
status: Literal["started", "success", "cancelled", "error"] | None = None
|
220
|
+
eval: EvalSpec | None = None
|
221
|
+
plan: EvalPlan | None = None
|
222
|
+
results: EvalResults | None = None
|
223
|
+
stats: EvalStats | None = None
|
224
|
+
error: EvalError | None = None
|
220
225
|
for k, v in ijson.kvitems(f, ""):
|
221
226
|
if k == "status":
|
222
227
|
assert v in get_args(
|
@@ -239,6 +244,9 @@ def _read_header_streaming(log_file: str) -> EvalLog:
|
|
239
244
|
break
|
240
245
|
|
241
246
|
assert status, "Must encounter a 'status'"
|
247
|
+
assert eval, "Must encounter a 'eval'"
|
248
|
+
assert plan, "Must encounter a 'plan'"
|
249
|
+
assert stats, "Must encounter a 'stats'"
|
242
250
|
|
243
251
|
return EvalLog(
|
244
252
|
eval=eval,
|
inspect_ai/log/_transcript.py
CHANGED
@@ -146,7 +146,7 @@ class ModelEvent(BaseEvent):
|
|
146
146
|
"""working time for model call that succeeded (i.e. was not retried)."""
|
147
147
|
|
148
148
|
@field_serializer("completed")
|
149
|
-
def serialize_completed(self, dt: datetime) -> str:
|
149
|
+
def serialize_completed(self, dt: datetime | None) -> str | None:
|
150
150
|
if dt is None:
|
151
151
|
return None
|
152
152
|
return dt.astimezone().isoformat()
|
@@ -170,6 +170,9 @@ class ToolEvent(BaseEvent):
|
|
170
170
|
arguments: dict[str, JsonValue]
|
171
171
|
"""Arguments to function."""
|
172
172
|
|
173
|
+
internal_name: str | None = Field(default=None)
|
174
|
+
"""Internal name for tool (if any)."""
|
175
|
+
|
173
176
|
view: ToolCallContent | None = Field(default=None)
|
174
177
|
"""Custom view of tool call input."""
|
175
178
|
|
@@ -235,7 +238,9 @@ class ToolEvent(BaseEvent):
|
|
235
238
|
"""Required so that we can include '_cancel_fn' as a member."""
|
236
239
|
|
237
240
|
@field_serializer("completed")
|
238
|
-
def serialize_completed(self, dt: datetime) -> str:
|
241
|
+
def serialize_completed(self, dt: datetime | None) -> str | None:
|
242
|
+
if dt is None:
|
243
|
+
return None
|
239
244
|
return dt.astimezone().isoformat()
|
240
245
|
|
241
246
|
|
@@ -270,7 +275,9 @@ class SandboxEvent(BaseEvent):
|
|
270
275
|
"""Time that sandbox action completed (see `timestamp` for started)"""
|
271
276
|
|
272
277
|
@field_serializer("completed")
|
273
|
-
def serialize_completed(self, dt: datetime) -> str:
|
278
|
+
def serialize_completed(self, dt: datetime | None) -> str | None:
|
279
|
+
if dt is None:
|
280
|
+
return None
|
274
281
|
return dt.astimezone().isoformat()
|
275
282
|
|
276
283
|
|
@@ -412,7 +419,9 @@ class SubtaskEvent(BaseEvent):
|
|
412
419
|
"""Working time for subtask (i.e. time not spent waiting on semaphores or model retries)."""
|
413
420
|
|
414
421
|
@field_serializer("completed")
|
415
|
-
def serialize_completed(self, dt: datetime) -> str:
|
422
|
+
def serialize_completed(self, dt: datetime | None) -> str | None:
|
423
|
+
if dt is None:
|
424
|
+
return None
|
416
425
|
return dt.astimezone().isoformat()
|
417
426
|
|
418
427
|
|
inspect_ai/model/_call_tools.py
CHANGED
@@ -25,7 +25,6 @@ from typing import (
|
|
25
25
|
if sys.version_info < (3, 11):
|
26
26
|
from exceptiongroup import ExceptionGroup
|
27
27
|
|
28
|
-
|
29
28
|
import anyio
|
30
29
|
import yaml
|
31
30
|
from anyio.streams.memory import MemoryObjectSendStream
|
@@ -168,6 +167,7 @@ async def call_tools(
|
|
168
167
|
id=call.id,
|
169
168
|
function=call.function,
|
170
169
|
arguments=call.arguments,
|
170
|
+
internal_name=call.internal_name,
|
171
171
|
result=content,
|
172
172
|
truncated=truncated,
|
173
173
|
view=call.view,
|
@@ -183,6 +183,7 @@ async def call_tools(
|
|
183
183
|
content=content,
|
184
184
|
tool_call_id=call.id,
|
185
185
|
function=call.function,
|
186
|
+
internal_name=call.internal_name,
|
186
187
|
error=tool_error,
|
187
188
|
),
|
188
189
|
event,
|
@@ -201,6 +202,7 @@ async def call_tools(
|
|
201
202
|
id=call.id,
|
202
203
|
function=call.function,
|
203
204
|
arguments=call.arguments,
|
205
|
+
internal_name=call.internal_name,
|
204
206
|
view=call.view,
|
205
207
|
pending=True,
|
206
208
|
)
|
@@ -216,9 +218,7 @@ async def call_tools(
|
|
216
218
|
tg.start_soon(call_tool_task, call, send_stream)
|
217
219
|
event._set_cancel_fn(tg.cancel_scope.cancel)
|
218
220
|
async with receive_stream:
|
219
|
-
|
220
|
-
tool_message, result_event = result
|
221
|
-
break
|
221
|
+
tool_message, result_event = await receive_stream.receive()
|
222
222
|
except ExceptionGroup as ex:
|
223
223
|
raise ex.exceptions[0]
|
224
224
|
|
@@ -226,6 +226,7 @@ async def call_tools(
|
|
226
226
|
tool_message = ChatMessageTool(
|
227
227
|
content="",
|
228
228
|
function=call.function,
|
229
|
+
internal_name=call.internal_name,
|
229
230
|
tool_call_id=call.id,
|
230
231
|
error=ToolCallError(
|
231
232
|
"timeout", "Command timed out before completing."
|
@@ -235,6 +236,7 @@ async def call_tools(
|
|
235
236
|
id=call.id,
|
236
237
|
function=call.function,
|
237
238
|
arguments=call.arguments,
|
239
|
+
internal_name=call.internal_name,
|
238
240
|
result=tool_message.content,
|
239
241
|
truncated=None,
|
240
242
|
view=call.view,
|
@@ -508,6 +510,13 @@ def tool_parse_error_message(arguments: str, ex: Exception) -> str:
|
|
508
510
|
def parse_tool_call(
|
509
511
|
id: str, function: str, arguments: str, tools: list[ToolInfo] | None = None
|
510
512
|
) -> ToolCall:
|
513
|
+
"""Parse a tool call from a JSON payload.
|
514
|
+
|
515
|
+
Note that this function doesn't know about internal tool names so the caller
|
516
|
+
should ammend the returned `ToolCall` by mapping the parsed `function` field from
|
517
|
+
from an internal name to an inspect tool name and fixing up the `ToolCall` object
|
518
|
+
as required to reflect this change.
|
519
|
+
"""
|
511
520
|
error: str | None = None
|
512
521
|
arguments_dict: dict[str, Any] = {}
|
513
522
|
|
@@ -158,6 +158,9 @@ class ChatMessageTool(ChatMessageBase):
|
|
158
158
|
function: str | None = Field(default=None)
|
159
159
|
"""Name of function called."""
|
160
160
|
|
161
|
+
internal_name: str | None = Field(default=None)
|
162
|
+
"""Internal name for tool (if any)."""
|
163
|
+
|
161
164
|
error: ToolCallError | None = Field(default=None)
|
162
165
|
"""Error which occurred during tool call."""
|
163
166
|
|
inspect_ai/model/_model.py
CHANGED
@@ -454,6 +454,7 @@ class Model:
|
|
454
454
|
async def generate() -> ModelOutput:
|
455
455
|
check_sample_interrupt()
|
456
456
|
|
457
|
+
cache_entry: CacheEntry | None
|
457
458
|
if cache:
|
458
459
|
if isinstance(cache, CachePolicy):
|
459
460
|
policy = cache
|
@@ -481,6 +482,8 @@ class Model:
|
|
481
482
|
call=None,
|
482
483
|
)
|
483
484
|
return existing
|
485
|
+
else:
|
486
|
+
cache_entry = None
|
484
487
|
|
485
488
|
# verify that model apis are allowed
|
486
489
|
self.verify_model_apis()
|
@@ -550,7 +553,7 @@ class Model:
|
|
550
553
|
json.dumps(dict(model=str(self), usage=output.usage.model_dump())),
|
551
554
|
)
|
552
555
|
|
553
|
-
if cache:
|
556
|
+
if cache and cache_entry:
|
554
557
|
cache_store(entry=cache_entry, output=output)
|
555
558
|
|
556
559
|
return output
|
@@ -1112,6 +1115,7 @@ def tool_result_images_reducer(
|
|
1112
1115
|
content=edited_tool_message_content,
|
1113
1116
|
tool_call_id=message.tool_call_id,
|
1114
1117
|
function=message.function,
|
1118
|
+
internal_name=message.internal_name,
|
1115
1119
|
)
|
1116
1120
|
],
|
1117
1121
|
pending_content + new_user_message_content,
|
@@ -188,8 +188,10 @@ class ModelOutput(BaseModel):
|
|
188
188
|
model: str,
|
189
189
|
tool_name: str,
|
190
190
|
tool_arguments: dict[str, Any],
|
191
|
+
internal_tool_name: str | None = None,
|
191
192
|
tool_call_id: str | None = None,
|
192
193
|
content: str | None = None,
|
194
|
+
type: str = "function",
|
193
195
|
) -> "ModelOutput":
|
194
196
|
"""
|
195
197
|
Returns a ModelOutput for requesting a tool call.
|
@@ -197,6 +199,8 @@ class ModelOutput(BaseModel):
|
|
197
199
|
Args:
|
198
200
|
model: model name
|
199
201
|
tool_name: The name of the tool.
|
202
|
+
internal_tool_name: The model's internal name for the tool (if any).
|
203
|
+
type: The model's type for the tool. e.g. "function", "computer_use_preview"
|
200
204
|
tool_arguments: The arguments passed to the tool.
|
201
205
|
tool_call_id: Optional ID for the tool call. Defaults to a random UUID.
|
202
206
|
content: Optional content to include in the message. Defaults to "tool call for tool {tool_name}".
|
@@ -221,8 +225,9 @@ class ModelOutput(BaseModel):
|
|
221
225
|
ToolCall(
|
222
226
|
id=tool_call_id,
|
223
227
|
function=tool_name,
|
228
|
+
internal_name=internal_tool_name,
|
224
229
|
arguments=tool_arguments,
|
225
|
-
type=
|
230
|
+
type=type,
|
226
231
|
)
|
227
232
|
],
|
228
233
|
),
|