inspect-ai 0.3.75__py3-none-any.whl → 0.3.77__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. inspect_ai/_cli/eval.py +16 -0
  2. inspect_ai/_display/core/results.py +6 -1
  3. inspect_ai/_eval/eval.py +8 -1
  4. inspect_ai/_eval/evalset.py +6 -2
  5. inspect_ai/_eval/registry.py +3 -5
  6. inspect_ai/_eval/run.py +7 -2
  7. inspect_ai/_eval/task/run.py +4 -0
  8. inspect_ai/_util/content.py +3 -0
  9. inspect_ai/_util/logger.py +3 -0
  10. inspect_ai/_view/www/dist/assets/index.css +28 -16
  11. inspect_ai/_view/www/dist/assets/index.js +4811 -4609
  12. inspect_ai/_view/www/log-schema.json +79 -9
  13. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +22 -4
  14. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +1 -1
  15. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +1 -1
  16. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -2
  17. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +1 -1
  18. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +2 -2
  19. inspect_ai/_view/www/src/types/log.d.ts +11 -5
  20. inspect_ai/log/_recorders/json.py +8 -0
  21. inspect_ai/log/_transcript.py +13 -4
  22. inspect_ai/model/_call_tools.py +13 -4
  23. inspect_ai/model/_chat_message.py +3 -0
  24. inspect_ai/model/_model.py +5 -1
  25. inspect_ai/model/_model_output.py +6 -1
  26. inspect_ai/model/_openai.py +78 -10
  27. inspect_ai/model/_openai_responses.py +277 -0
  28. inspect_ai/model/_providers/anthropic.py +134 -75
  29. inspect_ai/model/_providers/azureai.py +2 -2
  30. inspect_ai/model/_providers/mistral.py +29 -13
  31. inspect_ai/model/_providers/openai.py +64 -57
  32. inspect_ai/model/_providers/openai_responses.py +177 -0
  33. inspect_ai/model/_providers/openrouter.py +52 -2
  34. inspect_ai/model/_providers/providers.py +1 -1
  35. inspect_ai/model/_providers/vertex.py +5 -2
  36. inspect_ai/tool/__init__.py +6 -0
  37. inspect_ai/tool/_tool.py +23 -3
  38. inspect_ai/tool/_tool_call.py +5 -2
  39. inspect_ai/tool/_tool_support_helpers.py +200 -0
  40. inspect_ai/tool/_tools/_bash_session.py +119 -0
  41. inspect_ai/tool/_tools/_computer/_computer.py +1 -1
  42. inspect_ai/tool/_tools/_text_editor.py +121 -0
  43. inspect_ai/tool/_tools/_think.py +48 -0
  44. inspect_ai/tool/_tools/_web_browser/_back_compat.py +150 -0
  45. inspect_ai/tool/_tools/_web_browser/_web_browser.py +75 -130
  46. inspect_ai/tool/_tools/_web_search.py +1 -1
  47. inspect_ai/util/_json.py +28 -0
  48. inspect_ai/util/_sandbox/context.py +16 -7
  49. inspect_ai/util/_sandbox/docker/config.py +1 -1
  50. inspect_ai/util/_sandbox/docker/internal.py +3 -3
  51. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/METADATA +5 -2
  52. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/RECORD +56 -80
  53. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/WHEEL +1 -1
  54. inspect_ai/model/_image.py +0 -15
  55. inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +0 -8
  56. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +0 -24
  57. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +0 -25
  58. inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +0 -22
  59. inspect_ai/tool/_tools/_web_browser/_resources/README.md +0 -63
  60. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +0 -71
  61. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +0 -323
  62. inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +0 -5
  63. inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +0 -279
  64. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +0 -9
  65. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +0 -293
  66. inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +0 -94
  67. inspect_ai/tool/_tools/_web_browser/_resources/constants.py +0 -2
  68. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +0 -2
  69. inspect_ai/tool/_tools/_web_browser/_resources/mock_environment.py +0 -45
  70. inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +0 -50
  71. inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +0 -48
  72. inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +0 -280
  73. inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +0 -65
  74. inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +0 -64
  75. inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +0 -146
  76. inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +0 -64
  77. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +0 -180
  78. inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +0 -99
  79. inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +0 -15
  80. inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +0 -44
  81. inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +0 -39
  82. inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +0 -214
  83. inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +0 -35
  84. inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +0 -192
  85. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/entry_points.txt +0 -0
  86. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info/licenses}/LICENSE +0 -0
  87. {inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/top_level.txt +0 -0
@@ -200,8 +200,16 @@
200
200
  "description": "Assistant chat message.",
201
201
  "properties": {
202
202
  "id": {
203
- "title": "Id",
204
- "type": "string"
203
+ "anyOf": [
204
+ {
205
+ "type": "string"
206
+ },
207
+ {
208
+ "type": "null"
209
+ }
210
+ ],
211
+ "default": null,
212
+ "title": "Id"
205
213
  },
206
214
  "content": {
207
215
  "anyOf": [
@@ -286,8 +294,16 @@
286
294
  "description": "System chat message.",
287
295
  "properties": {
288
296
  "id": {
289
- "title": "Id",
290
- "type": "string"
297
+ "anyOf": [
298
+ {
299
+ "type": "string"
300
+ },
301
+ {
302
+ "type": "null"
303
+ }
304
+ ],
305
+ "default": null,
306
+ "title": "Id"
291
307
  },
292
308
  "content": {
293
309
  "anyOf": [
@@ -356,8 +372,16 @@
356
372
  "description": "Tool chat message.",
357
373
  "properties": {
358
374
  "id": {
359
- "title": "Id",
360
- "type": "string"
375
+ "anyOf": [
376
+ {
377
+ "type": "string"
378
+ },
379
+ {
380
+ "type": "null"
381
+ }
382
+ ],
383
+ "default": null,
384
+ "title": "Id"
361
385
  },
362
386
  "content": {
363
387
  "anyOf": [
@@ -435,6 +459,18 @@
435
459
  "default": null,
436
460
  "title": "Function"
437
461
  },
462
+ "internal_name": {
463
+ "anyOf": [
464
+ {
465
+ "type": "string"
466
+ },
467
+ {
468
+ "type": "null"
469
+ }
470
+ ],
471
+ "default": null,
472
+ "title": "Internal Name"
473
+ },
438
474
  "error": {
439
475
  "anyOf": [
440
476
  {
@@ -454,6 +490,7 @@
454
490
  "role",
455
491
  "tool_call_id",
456
492
  "function",
493
+ "internal_name",
457
494
  "error"
458
495
  ],
459
496
  "title": "ChatMessageTool",
@@ -464,8 +501,16 @@
464
501
  "description": "User chat message.",
465
502
  "properties": {
466
503
  "id": {
467
- "title": "Id",
468
- "type": "string"
504
+ "anyOf": [
505
+ {
506
+ "type": "string"
507
+ },
508
+ {
509
+ "type": "null"
510
+ }
511
+ ],
512
+ "default": null,
513
+ "title": "Id"
469
514
  },
470
515
  "content": {
471
516
  "anyOf": [
@@ -4431,10 +4476,21 @@
4431
4476
  "type": "object"
4432
4477
  },
4433
4478
  "type": {
4434
- "const": "function",
4435
4479
  "title": "Type",
4436
4480
  "type": "string"
4437
4481
  },
4482
+ "internal_name": {
4483
+ "anyOf": [
4484
+ {
4485
+ "type": "string"
4486
+ },
4487
+ {
4488
+ "type": "null"
4489
+ }
4490
+ ],
4491
+ "default": null,
4492
+ "title": "Internal Name"
4493
+ },
4438
4494
  "parse_error": {
4439
4495
  "anyOf": [
4440
4496
  {
@@ -4464,6 +4520,7 @@
4464
4520
  "function",
4465
4521
  "arguments",
4466
4522
  "type",
4523
+ "internal_name",
4467
4524
  "parse_error",
4468
4525
  "view"
4469
4526
  ],
@@ -4623,6 +4680,18 @@
4623
4680
  "title": "Arguments",
4624
4681
  "type": "object"
4625
4682
  },
4683
+ "internal_name": {
4684
+ "anyOf": [
4685
+ {
4686
+ "type": "string"
4687
+ },
4688
+ {
4689
+ "type": "null"
4690
+ }
4691
+ ],
4692
+ "default": null,
4693
+ "title": "Internal Name"
4694
+ },
4626
4695
  "view": {
4627
4696
  "anyOf": [
4628
4697
  {
@@ -4809,6 +4878,7 @@
4809
4878
  "id",
4810
4879
  "function",
4811
4880
  "arguments",
4881
+ "internal_name",
4812
4882
  "view",
4813
4883
  "result",
4814
4884
  "truncated",
@@ -83,8 +83,24 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
83
83
  : !isContentImage(output);
84
84
  const normalizedContent = useMemo(() => normalizeContent(output), [output]);
85
85
 
86
- const contents = mode !== "compact" ? input : input || functionCall;
86
+ const hasContent = normalizedContent.find((c) => {
87
+ if (c.type === "tool") {
88
+ for (const t of c.content) {
89
+ if (t.type === "text") {
90
+ if (t.text) {
91
+ return true;
92
+ }
93
+ } else {
94
+ return true;
95
+ }
96
+ }
97
+ return false;
98
+ } else {
99
+ return true;
100
+ }
101
+ });
87
102
 
103
+ const contents = mode !== "compact" ? input : input || functionCall;
88
104
  return (
89
105
  <div>
90
106
  {mode !== "compact" && (!view || view.title) ? (
@@ -99,9 +115,11 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
99
115
  contents={contents}
100
116
  toolCallView={view}
101
117
  />
102
- <ExpandablePanel collapse={collapse} border={true} lines={15}>
103
- <MessageContent contents={normalizedContent} />
104
- </ExpandablePanel>
118
+ {hasContent ? (
119
+ <ExpandablePanel collapse={collapse} border={true} lines={15}>
120
+ <MessageContent contents={normalizedContent} />
121
+ </ExpandablePanel>
122
+ ) : undefined}
105
123
  </div>
106
124
  </div>
107
125
  </div>
@@ -64,7 +64,7 @@ export const ToolInput: FC<ToolInputProps> = memo((props) => {
64
64
  className={clsx(
65
65
  "source-code",
66
66
  "sourceCode",
67
- `language-${highlightLanguage}`,
67
+ highlightLanguage ? `language-${highlightLanguage}` : undefined,
68
68
  styles.outputCode,
69
69
  )}
70
70
  >
@@ -9,7 +9,7 @@ export const categoricalScoreDescriptor = (
9
9
  scoreType: kScoreTypeCategorical,
10
10
  categories: values,
11
11
  compare: (a, b) => {
12
- return String(a).localeCompare(String(b));
12
+ return String(a.value).localeCompare(String(b.value));
13
13
  },
14
14
  render: (score) => {
15
15
  return String(score);
@@ -13,8 +13,8 @@ export const numericScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
13
13
  min: Math.min(...onlyNumeric),
14
14
  max: Math.max(...onlyNumeric),
15
15
  compare: (a, b) => {
16
- if (typeof a === "number" && typeof b === "number") {
17
- return a - b;
16
+ if (typeof a.value === "number" && typeof b.value === "number") {
17
+ return a.value - b.value;
18
18
  } else {
19
19
  console.warn("Comparing non-numerics using a numeric score descriptor");
20
20
  return 0;
@@ -165,8 +165,8 @@ export const sortSamples = (
165
165
  }
166
166
 
167
167
  return samplesDescriptor.selectedScoreDescriptor.compare(
168
- aScore,
169
168
  bScore,
169
+ aScore,
170
170
  );
171
171
  }
172
172
  default:
@@ -31,8 +31,8 @@
31
31
  }
32
32
 
33
33
  .code {
34
- white-space: pre-wrap;
35
- word-wrap: anywhere;
34
+ white-space: pre-wrap !important;
35
+ word-wrap: anywhere !important;
36
36
  }
37
37
 
38
38
  .toolConfig {
@@ -148,7 +148,7 @@ export type Input =
148
148
  | ChatMessageAssistant
149
149
  | ChatMessageTool
150
150
  )[];
151
- export type Id1 = string;
151
+ export type Id1 = string | null;
152
152
  export type Content =
153
153
  | string
154
154
  | (
@@ -175,7 +175,7 @@ export type Video = string;
175
175
  export type Format1 = "mp4" | "mpeg" | "mov";
176
176
  export type Source = ("input" | "generate") | null;
177
177
  export type Role = "system";
178
- export type Id2 = string;
178
+ export type Id2 = string | null;
179
179
  export type Content1 =
180
180
  | string
181
181
  | (
@@ -188,7 +188,7 @@ export type Content1 =
188
188
  export type Source1 = ("input" | "generate") | null;
189
189
  export type Role1 = "user";
190
190
  export type ToolCallId = string[] | null;
191
- export type Id3 = string;
191
+ export type Id3 = string | null;
192
192
  export type Content2 =
193
193
  | string
194
194
  | (
@@ -203,12 +203,13 @@ export type Role2 = "assistant";
203
203
  export type ToolCalls = ToolCall[] | null;
204
204
  export type Id4 = string;
205
205
  export type Function = string;
206
- export type Type8 = "function";
206
+ export type Type8 = string;
207
+ export type InternalName = string | null;
207
208
  export type ParseError = string | null;
208
209
  export type Title = string | null;
209
210
  export type Format2 = "text" | "markdown";
210
211
  export type Content3 = string;
211
- export type Id5 = string;
212
+ export type Id5 = string | null;
212
213
  export type Content4 =
213
214
  | string
214
215
  | (
@@ -222,6 +223,7 @@ export type Source3 = ("input" | "generate") | null;
222
223
  export type Role3 = "tool";
223
224
  export type ToolCallId1 = string | null;
224
225
  export type Function1 = string | null;
226
+ export type InternalName1 = string | null;
225
227
  export type Type9 =
226
228
  | "parsing"
227
229
  | "timeout"
@@ -369,6 +371,7 @@ export type Event6 = "tool";
369
371
  export type Type12 = "function";
370
372
  export type Id7 = string;
371
373
  export type Function2 = string;
374
+ export type InternalName2 = string | null;
372
375
  export type Result1 =
373
376
  | string
374
377
  | number
@@ -911,6 +914,7 @@ export interface ToolCall {
911
914
  function: Function;
912
915
  arguments: Arguments;
913
916
  type: Type8;
917
+ internal_name: InternalName;
914
918
  parse_error: ParseError;
915
919
  view: ToolCallContent | null;
916
920
  }
@@ -933,6 +937,7 @@ export interface ChatMessageTool {
933
937
  role: Role3;
934
938
  tool_call_id: ToolCallId1;
935
939
  function: Function1;
940
+ internal_name: InternalName1;
936
941
  error: ToolCallError | null;
937
942
  }
938
943
  export interface ToolCallError {
@@ -1201,6 +1206,7 @@ export interface ToolEvent {
1201
1206
  id: Id7;
1202
1207
  function: Function2;
1203
1208
  arguments: Arguments1;
1209
+ internal_name: InternalName2;
1204
1210
  view: ToolCallContent | null;
1205
1211
  result: Result1;
1206
1212
  truncated: Truncated;
@@ -217,6 +217,11 @@ def _read_header_streaming(log_file: str) -> EvalLog:
217
217
 
218
218
  # Parse the log file, stopping before parsing samples
219
219
  status: Literal["started", "success", "cancelled", "error"] | None = None
220
+ eval: EvalSpec | None = None
221
+ plan: EvalPlan | None = None
222
+ results: EvalResults | None = None
223
+ stats: EvalStats | None = None
224
+ error: EvalError | None = None
220
225
  for k, v in ijson.kvitems(f, ""):
221
226
  if k == "status":
222
227
  assert v in get_args(
@@ -239,6 +244,9 @@ def _read_header_streaming(log_file: str) -> EvalLog:
239
244
  break
240
245
 
241
246
  assert status, "Must encounter a 'status'"
247
+ assert eval, "Must encounter a 'eval'"
248
+ assert plan, "Must encounter a 'plan'"
249
+ assert stats, "Must encounter a 'stats'"
242
250
 
243
251
  return EvalLog(
244
252
  eval=eval,
@@ -146,7 +146,7 @@ class ModelEvent(BaseEvent):
146
146
  """working time for model call that succeeded (i.e. was not retried)."""
147
147
 
148
148
  @field_serializer("completed")
149
- def serialize_completed(self, dt: datetime) -> str:
149
+ def serialize_completed(self, dt: datetime | None) -> str | None:
150
150
  if dt is None:
151
151
  return None
152
152
  return dt.astimezone().isoformat()
@@ -170,6 +170,9 @@ class ToolEvent(BaseEvent):
170
170
  arguments: dict[str, JsonValue]
171
171
  """Arguments to function."""
172
172
 
173
+ internal_name: str | None = Field(default=None)
174
+ """Internal name for tool (if any)."""
175
+
173
176
  view: ToolCallContent | None = Field(default=None)
174
177
  """Custom view of tool call input."""
175
178
 
@@ -235,7 +238,9 @@ class ToolEvent(BaseEvent):
235
238
  """Required so that we can include '_cancel_fn' as a member."""
236
239
 
237
240
  @field_serializer("completed")
238
- def serialize_completed(self, dt: datetime) -> str:
241
+ def serialize_completed(self, dt: datetime | None) -> str | None:
242
+ if dt is None:
243
+ return None
239
244
  return dt.astimezone().isoformat()
240
245
 
241
246
 
@@ -270,7 +275,9 @@ class SandboxEvent(BaseEvent):
270
275
  """Time that sandbox action completed (see `timestamp` for started)"""
271
276
 
272
277
  @field_serializer("completed")
273
- def serialize_completed(self, dt: datetime) -> str:
278
+ def serialize_completed(self, dt: datetime | None) -> str | None:
279
+ if dt is None:
280
+ return None
274
281
  return dt.astimezone().isoformat()
275
282
 
276
283
 
@@ -412,7 +419,9 @@ class SubtaskEvent(BaseEvent):
412
419
  """Working time for subtask (i.e. time not spent waiting on semaphores or model retries)."""
413
420
 
414
421
  @field_serializer("completed")
415
- def serialize_completed(self, dt: datetime) -> str:
422
+ def serialize_completed(self, dt: datetime | None) -> str | None:
423
+ if dt is None:
424
+ return None
416
425
  return dt.astimezone().isoformat()
417
426
 
418
427
 
@@ -25,7 +25,6 @@ from typing import (
25
25
  if sys.version_info < (3, 11):
26
26
  from exceptiongroup import ExceptionGroup
27
27
 
28
-
29
28
  import anyio
30
29
  import yaml
31
30
  from anyio.streams.memory import MemoryObjectSendStream
@@ -168,6 +167,7 @@ async def call_tools(
168
167
  id=call.id,
169
168
  function=call.function,
170
169
  arguments=call.arguments,
170
+ internal_name=call.internal_name,
171
171
  result=content,
172
172
  truncated=truncated,
173
173
  view=call.view,
@@ -183,6 +183,7 @@ async def call_tools(
183
183
  content=content,
184
184
  tool_call_id=call.id,
185
185
  function=call.function,
186
+ internal_name=call.internal_name,
186
187
  error=tool_error,
187
188
  ),
188
189
  event,
@@ -201,6 +202,7 @@ async def call_tools(
201
202
  id=call.id,
202
203
  function=call.function,
203
204
  arguments=call.arguments,
205
+ internal_name=call.internal_name,
204
206
  view=call.view,
205
207
  pending=True,
206
208
  )
@@ -216,9 +218,7 @@ async def call_tools(
216
218
  tg.start_soon(call_tool_task, call, send_stream)
217
219
  event._set_cancel_fn(tg.cancel_scope.cancel)
218
220
  async with receive_stream:
219
- async for result in receive_stream:
220
- tool_message, result_event = result
221
- break
221
+ tool_message, result_event = await receive_stream.receive()
222
222
  except ExceptionGroup as ex:
223
223
  raise ex.exceptions[0]
224
224
 
@@ -226,6 +226,7 @@ async def call_tools(
226
226
  tool_message = ChatMessageTool(
227
227
  content="",
228
228
  function=call.function,
229
+ internal_name=call.internal_name,
229
230
  tool_call_id=call.id,
230
231
  error=ToolCallError(
231
232
  "timeout", "Command timed out before completing."
@@ -235,6 +236,7 @@ async def call_tools(
235
236
  id=call.id,
236
237
  function=call.function,
237
238
  arguments=call.arguments,
239
+ internal_name=call.internal_name,
238
240
  result=tool_message.content,
239
241
  truncated=None,
240
242
  view=call.view,
@@ -508,6 +510,13 @@ def tool_parse_error_message(arguments: str, ex: Exception) -> str:
508
510
  def parse_tool_call(
509
511
  id: str, function: str, arguments: str, tools: list[ToolInfo] | None = None
510
512
  ) -> ToolCall:
513
+ """Parse a tool call from a JSON payload.
514
+
515
+ Note that this function doesn't know about internal tool names so the caller
516
+ should ammend the returned `ToolCall` by mapping the parsed `function` field from
517
+ from an internal name to an inspect tool name and fixing up the `ToolCall` object
518
+ as required to reflect this change.
519
+ """
511
520
  error: str | None = None
512
521
  arguments_dict: dict[str, Any] = {}
513
522
 
@@ -158,6 +158,9 @@ class ChatMessageTool(ChatMessageBase):
158
158
  function: str | None = Field(default=None)
159
159
  """Name of function called."""
160
160
 
161
+ internal_name: str | None = Field(default=None)
162
+ """Internal name for tool (if any)."""
163
+
161
164
  error: ToolCallError | None = Field(default=None)
162
165
  """Error which occurred during tool call."""
163
166
 
@@ -454,6 +454,7 @@ class Model:
454
454
  async def generate() -> ModelOutput:
455
455
  check_sample_interrupt()
456
456
 
457
+ cache_entry: CacheEntry | None
457
458
  if cache:
458
459
  if isinstance(cache, CachePolicy):
459
460
  policy = cache
@@ -481,6 +482,8 @@ class Model:
481
482
  call=None,
482
483
  )
483
484
  return existing
485
+ else:
486
+ cache_entry = None
484
487
 
485
488
  # verify that model apis are allowed
486
489
  self.verify_model_apis()
@@ -550,7 +553,7 @@ class Model:
550
553
  json.dumps(dict(model=str(self), usage=output.usage.model_dump())),
551
554
  )
552
555
 
553
- if cache:
556
+ if cache and cache_entry:
554
557
  cache_store(entry=cache_entry, output=output)
555
558
 
556
559
  return output
@@ -1112,6 +1115,7 @@ def tool_result_images_reducer(
1112
1115
  content=edited_tool_message_content,
1113
1116
  tool_call_id=message.tool_call_id,
1114
1117
  function=message.function,
1118
+ internal_name=message.internal_name,
1115
1119
  )
1116
1120
  ],
1117
1121
  pending_content + new_user_message_content,
@@ -188,8 +188,10 @@ class ModelOutput(BaseModel):
188
188
  model: str,
189
189
  tool_name: str,
190
190
  tool_arguments: dict[str, Any],
191
+ internal_tool_name: str | None = None,
191
192
  tool_call_id: str | None = None,
192
193
  content: str | None = None,
194
+ type: str = "function",
193
195
  ) -> "ModelOutput":
194
196
  """
195
197
  Returns a ModelOutput for requesting a tool call.
@@ -197,6 +199,8 @@ class ModelOutput(BaseModel):
197
199
  Args:
198
200
  model: model name
199
201
  tool_name: The name of the tool.
202
+ internal_tool_name: The model's internal name for the tool (if any).
203
+ type: The model's type for the tool. e.g. "function", "computer_use_preview"
200
204
  tool_arguments: The arguments passed to the tool.
201
205
  tool_call_id: Optional ID for the tool call. Defaults to a random UUID.
202
206
  content: Optional content to include in the message. Defaults to "tool call for tool {tool_name}".
@@ -221,8 +225,9 @@ class ModelOutput(BaseModel):
221
225
  ToolCall(
222
226
  id=tool_call_id,
223
227
  function=tool_name,
228
+ internal_name=internal_tool_name,
224
229
  arguments=tool_arguments,
225
- type="function",
230
+ type=type,
226
231
  )
227
232
  ],
228
233
  ),