pydantic-ai-slim 1.0.14__py3-none-any.whl → 1.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydantic-ai-slim might be problematic. Click here for more details.

Files changed (38) hide show
  1. pydantic_ai/__init__.py +19 -1
  2. pydantic_ai/_agent_graph.py +116 -93
  3. pydantic_ai/_cli.py +4 -7
  4. pydantic_ai/_output.py +236 -192
  5. pydantic_ai/_parts_manager.py +8 -42
  6. pydantic_ai/_tool_manager.py +9 -16
  7. pydantic_ai/agent/abstract.py +169 -1
  8. pydantic_ai/builtin_tools.py +82 -0
  9. pydantic_ai/direct.py +7 -0
  10. pydantic_ai/durable_exec/dbos/_agent.py +106 -3
  11. pydantic_ai/durable_exec/temporal/_agent.py +123 -6
  12. pydantic_ai/durable_exec/temporal/_model.py +8 -0
  13. pydantic_ai/format_prompt.py +4 -3
  14. pydantic_ai/mcp.py +20 -10
  15. pydantic_ai/messages.py +149 -3
  16. pydantic_ai/models/__init__.py +15 -1
  17. pydantic_ai/models/anthropic.py +7 -3
  18. pydantic_ai/models/cohere.py +4 -0
  19. pydantic_ai/models/function.py +7 -4
  20. pydantic_ai/models/gemini.py +8 -0
  21. pydantic_ai/models/google.py +56 -23
  22. pydantic_ai/models/groq.py +11 -5
  23. pydantic_ai/models/huggingface.py +5 -3
  24. pydantic_ai/models/mistral.py +6 -8
  25. pydantic_ai/models/openai.py +197 -57
  26. pydantic_ai/models/test.py +4 -0
  27. pydantic_ai/output.py +5 -2
  28. pydantic_ai/profiles/__init__.py +2 -0
  29. pydantic_ai/profiles/google.py +5 -2
  30. pydantic_ai/profiles/openai.py +2 -1
  31. pydantic_ai/result.py +46 -30
  32. pydantic_ai/run.py +35 -7
  33. pydantic_ai/usage.py +5 -4
  34. {pydantic_ai_slim-1.0.14.dist-info → pydantic_ai_slim-1.0.15.dist-info}/METADATA +3 -3
  35. {pydantic_ai_slim-1.0.14.dist-info → pydantic_ai_slim-1.0.15.dist-info}/RECORD +38 -38
  36. {pydantic_ai_slim-1.0.14.dist-info → pydantic_ai_slim-1.0.15.dist-info}/WHEEL +0 -0
  37. {pydantic_ai_slim-1.0.14.dist-info → pydantic_ai_slim-1.0.15.dist-info}/entry_points.txt +0 -0
  38. {pydantic_ai_slim-1.0.14.dist-info → pydantic_ai_slim-1.0.15.dist-info}/licenses/LICENSE +0 -0
@@ -17,14 +17,16 @@ from .._output import DEFAULT_OUTPUT_TOOL_NAME, OutputObjectDefinition
17
17
  from .._run_context import RunContext
18
18
  from .._thinking_part import split_content_into_text_and_thinking
19
19
  from .._utils import guard_tool_call_id as _guard_tool_call_id, now_utc as _now_utc, number_to_datetime
20
- from ..builtin_tools import CodeExecutionTool, WebSearchTool
20
+ from ..builtin_tools import CodeExecutionTool, ImageGenerationTool, WebSearchTool
21
21
  from ..exceptions import UserError
22
22
  from ..messages import (
23
23
  AudioUrl,
24
24
  BinaryContent,
25
+ BinaryImage,
25
26
  BuiltinToolCallPart,
26
27
  BuiltinToolReturnPart,
27
28
  DocumentUrl,
29
+ FilePart,
28
30
  FinishReason,
29
31
  ImageUrl,
30
32
  ModelMessage,
@@ -678,6 +680,9 @@ class OpenAIChatModel(Model):
678
680
  # OpenAI doesn't return built-in tool calls
679
681
  elif isinstance(item, BuiltinToolCallPart | BuiltinToolReturnPart): # pragma: no cover
680
682
  pass
683
+ elif isinstance(item, FilePart): # pragma: no cover
684
+ # Files generated by models are not sent back to models that don't themselves generate files.
685
+ pass
681
686
  else:
682
687
  assert_never(item)
683
688
  message_param = chat.ChatCompletionAssistantMessageParam(role='assistant')
@@ -782,31 +787,27 @@ class OpenAIChatModel(Model):
782
787
  identifier=item.identifier,
783
788
  )
784
789
  )
785
- else:
786
- base64_encoded = base64.b64encode(item.data).decode('utf-8')
787
- if item.is_image:
788
- image_url: ImageURL = {'url': f'data:{item.media_type};base64,{base64_encoded}'}
789
- if metadata := item.vendor_metadata:
790
- image_url['detail'] = metadata.get('detail', 'auto')
791
- content.append(ChatCompletionContentPartImageParam(image_url=image_url, type='image_url'))
792
- elif item.is_audio:
793
- assert item.format in ('wav', 'mp3')
794
- audio = InputAudio(data=base64_encoded, format=item.format)
795
- content.append(
796
- ChatCompletionContentPartInputAudioParam(input_audio=audio, type='input_audio')
797
- )
798
- elif item.is_document:
799
- content.append(
800
- File(
801
- file=FileFile(
802
- file_data=f'data:{item.media_type};base64,{base64_encoded}',
803
- filename=f'filename.{item.format}',
804
- ),
805
- type='file',
806
- )
790
+ elif item.is_image:
791
+ image_url = ImageURL(url=item.data_uri)
792
+ if metadata := item.vendor_metadata:
793
+ image_url['detail'] = metadata.get('detail', 'auto')
794
+ content.append(ChatCompletionContentPartImageParam(image_url=image_url, type='image_url'))
795
+ elif item.is_audio:
796
+ assert item.format in ('wav', 'mp3')
797
+ audio = InputAudio(data=base64.b64encode(item.data).decode('utf-8'), format=item.format)
798
+ content.append(ChatCompletionContentPartInputAudioParam(input_audio=audio, type='input_audio'))
799
+ elif item.is_document:
800
+ content.append(
801
+ File(
802
+ file=FileFile(
803
+ file_data=item.data_uri,
804
+ filename=f'filename.{item.format}',
805
+ ),
806
+ type='file',
807
807
  )
808
- else: # pragma: no cover
809
- raise RuntimeError(f'Unsupported binary content type: {item.media_type}')
808
+ )
809
+ else: # pragma: no cover
810
+ raise RuntimeError(f'Unsupported binary content type: {item.media_type}')
810
811
  elif isinstance(item, AudioUrl):
811
812
  downloaded_item = await download_item(item, data_format='base64', type_format='extension')
812
813
  assert downloaded_item['data_type'] in (
@@ -941,7 +942,7 @@ class OpenAIResponsesModel(Model):
941
942
  response = await self._responses_create(
942
943
  messages, False, cast(OpenAIResponsesModelSettings, model_settings or {}), model_request_parameters
943
944
  )
944
- return self._process_response(response)
945
+ return self._process_response(response, model_request_parameters)
945
946
 
946
947
  @asynccontextmanager
947
948
  async def request_stream(
@@ -962,7 +963,9 @@ class OpenAIResponsesModel(Model):
962
963
  async with response:
963
964
  yield await self._process_streamed_response(response, model_request_parameters)
964
965
 
965
- def _process_response(self, response: responses.Response) -> ModelResponse: # noqa: C901
966
+ def _process_response( # noqa: C901
967
+ self, response: responses.Response, model_request_parameters: ModelRequestParameters
968
+ ) -> ModelResponse:
966
969
  """Process a non-streamed response, and prepare a message to return."""
967
970
  timestamp = number_to_datetime(response.created_at)
968
971
  items: list[ModelResponsePart] = []
@@ -1002,19 +1005,24 @@ class OpenAIResponsesModel(Model):
1002
1005
  ToolCallPart(item.name, item.arguments, tool_call_id=_combine_tool_call_ids(item.call_id, item.id))
1003
1006
  )
1004
1007
  elif isinstance(item, responses.ResponseCodeInterpreterToolCall):
1005
- call_part, return_part = _map_code_interpreter_tool_call(item, self.system)
1008
+ call_part, return_part, file_parts = _map_code_interpreter_tool_call(item, self.system)
1006
1009
  items.append(call_part)
1010
+ if file_parts:
1011
+ items.extend(file_parts)
1007
1012
  items.append(return_part)
1008
1013
  elif isinstance(item, responses.ResponseFunctionWebSearch):
1009
1014
  call_part, return_part = _map_web_search_tool_call(item, self.system)
1010
1015
  items.append(call_part)
1011
1016
  items.append(return_part)
1017
+ elif isinstance(item, responses.response_output_item.ImageGenerationCall):
1018
+ call_part, return_part, file_part = _map_image_generation_tool_call(item, self.system)
1019
+ items.append(call_part)
1020
+ if file_part: # pragma: no branch
1021
+ items.append(file_part)
1022
+ items.append(return_part)
1012
1023
  elif isinstance(item, responses.ResponseComputerToolCall): # pragma: no cover
1013
1024
  # Pydantic AI doesn't yet support the ComputerUse built-in tool
1014
1025
  pass
1015
- elif isinstance(item, responses.response_output_item.ImageGenerationCall): # pragma: no cover
1016
- # Pydantic AI doesn't yet support the ImageGeneration built-in tool
1017
- pass
1018
1026
  elif isinstance(item, responses.ResponseCustomToolCall): # pragma: no cover
1019
1027
  # Support is being implemented in https://github.com/pydantic/pydantic-ai/pull/2572
1020
1028
  pass
@@ -1204,6 +1212,7 @@ class OpenAIResponsesModel(Model):
1204
1212
 
1205
1213
  def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) -> list[responses.ToolParam]:
1206
1214
  tools: list[responses.ToolParam] = []
1215
+ has_image_generating_tool = False
1207
1216
  for tool in model_request_parameters.builtin_tools:
1208
1217
  if isinstance(tool, WebSearchTool):
1209
1218
  web_search_tool = responses.WebSearchToolParam(
@@ -1214,12 +1223,31 @@ class OpenAIResponsesModel(Model):
1214
1223
  type='approximate', **tool.user_location
1215
1224
  )
1216
1225
  tools.append(web_search_tool)
1217
- elif isinstance(tool, CodeExecutionTool): # pragma: no branch
1226
+ elif isinstance(tool, CodeExecutionTool):
1227
+ has_image_generating_tool = True
1218
1228
  tools.append({'type': 'code_interpreter', 'container': {'type': 'auto'}})
1229
+ elif isinstance(tool, ImageGenerationTool): # pragma: no branch
1230
+ has_image_generating_tool = True
1231
+ tools.append(
1232
+ responses.tool_param.ImageGeneration(
1233
+ type='image_generation',
1234
+ background=tool.background,
1235
+ input_fidelity=tool.input_fidelity,
1236
+ moderation=tool.moderation,
1237
+ output_compression=tool.output_compression,
1238
+ output_format=tool.output_format or 'png',
1239
+ partial_images=tool.partial_images,
1240
+ quality=tool.quality,
1241
+ size=tool.size,
1242
+ )
1243
+ )
1219
1244
  else:
1220
1245
  raise UserError( # pragma: no cover
1221
1246
  f'`{tool.__class__.__name__}` is not supported by `OpenAIResponsesModel`. If it should be, please file an issue.'
1222
1247
  )
1248
+
1249
+ if model_request_parameters.allow_image_output and not has_image_generating_tool:
1250
+ tools.append({'type': 'image_generation'})
1223
1251
  return tools
1224
1252
 
1225
1253
  def _map_tool_definition(self, f: ToolDefinition) -> responses.FunctionToolParam:
@@ -1282,8 +1310,7 @@ class OpenAIResponsesModel(Model):
1282
1310
  )
1283
1311
  openai_messages.append(item)
1284
1312
  elif isinstance(part, RetryPromptPart):
1285
- # TODO(Marcelo): How do we test this conditional branch?
1286
- if part.tool_name is None: # pragma: no cover
1313
+ if part.tool_name is None:
1287
1314
  openai_messages.append(
1288
1315
  Message(role='user', content=[{'type': 'input_text', 'text': part.model_response()}])
1289
1316
  )
@@ -1342,7 +1369,7 @@ class OpenAIResponsesModel(Model):
1342
1369
  param['id'] = id
1343
1370
  openai_messages.append(param)
1344
1371
  elif isinstance(item, BuiltinToolCallPart):
1345
- if item.provider_name == self.system:
1372
+ if item.provider_name == self.system and send_item_ids:
1346
1373
  if (
1347
1374
  item.tool_name == CodeExecutionTool.kind
1348
1375
  and item.tool_call_id
@@ -1353,7 +1380,7 @@ class OpenAIResponsesModel(Model):
1353
1380
  id=item.tool_call_id,
1354
1381
  code=args.get('code'),
1355
1382
  container_id=container_id,
1356
- outputs=None,
1383
+ outputs=None, # These can be read server-side
1357
1384
  status='completed',
1358
1385
  type='code_interpreter_call',
1359
1386
  )
@@ -1362,7 +1389,7 @@ class OpenAIResponsesModel(Model):
1362
1389
  item.tool_name == WebSearchTool.kind
1363
1390
  and item.tool_call_id
1364
1391
  and (args := item.args_as_dict())
1365
- ): # pragma: no branch
1392
+ ):
1366
1393
  web_search_item = responses.ResponseFunctionWebSearchParam(
1367
1394
  id=item.tool_call_id,
1368
1395
  action=cast(responses.response_function_web_search_param.Action, args),
@@ -1370,8 +1397,18 @@ class OpenAIResponsesModel(Model):
1370
1397
  type='web_search_call',
1371
1398
  )
1372
1399
  openai_messages.append(web_search_item)
1400
+ elif item.tool_name == ImageGenerationTool.kind and item.tool_call_id: # pragma: no branch
1401
+ # The cast is necessary because of https://github.com/openai/openai-python/issues/2648
1402
+ image_generation_item = cast(
1403
+ responses.response_input_item_param.ImageGenerationCall,
1404
+ {
1405
+ 'id': item.tool_call_id,
1406
+ 'type': 'image_generation_call',
1407
+ },
1408
+ )
1409
+ openai_messages.append(image_generation_item)
1373
1410
  elif isinstance(item, BuiltinToolReturnPart):
1374
- if item.provider_name == self.system:
1411
+ if item.provider_name == self.system and send_item_ids:
1375
1412
  if (
1376
1413
  item.tool_name == CodeExecutionTool.kind
1377
1414
  and code_interpreter_item is not None
@@ -1379,7 +1416,6 @@ class OpenAIResponsesModel(Model):
1379
1416
  and (content := cast(dict[str, Any], item.content)) # pyright: ignore[reportUnknownMemberType]
1380
1417
  and (status := content.get('status'))
1381
1418
  ):
1382
- code_interpreter_item['outputs'] = content.get('outputs')
1383
1419
  code_interpreter_item['status'] = status
1384
1420
  elif (
1385
1421
  item.tool_name == WebSearchTool.kind
@@ -1387,8 +1423,16 @@ class OpenAIResponsesModel(Model):
1387
1423
  and isinstance(item.content, dict) # pyright: ignore[reportUnknownMemberType]
1388
1424
  and (content := cast(dict[str, Any], item.content)) # pyright: ignore[reportUnknownMemberType]
1389
1425
  and (status := content.get('status'))
1390
- ): # pragma: no branch
1426
+ ):
1391
1427
  web_search_item['status'] = status
1428
+ elif item.tool_name == ImageGenerationTool.kind: # pragma: no branch
1429
+ # Image generation result does not need to be sent back, just the `id` off of `BuiltinToolCallPart`.
1430
+ pass
1431
+ elif isinstance(item, FilePart):
1432
+ # This was generated by the `ImageGenerationTool` or `CodeExecutionTool`,
1433
+ # and does not need to be sent back separately from the corresponding `BuiltinToolReturnPart`.
1434
+ # If `send_item_ids` is false, we won't send the `BuiltinToolReturnPart`, but OpenAI does not have a type for files from the assistant.
1435
+ pass
1392
1436
  elif isinstance(item, ThinkingPart):
1393
1437
  if item.id and send_item_ids:
1394
1438
  signature: str | None = None
@@ -1454,7 +1498,6 @@ class OpenAIResponsesModel(Model):
1454
1498
  if isinstance(item, str):
1455
1499
  content.append(responses.ResponseInputTextParam(text=item, type='input_text'))
1456
1500
  elif isinstance(item, BinaryContent):
1457
- base64_encoded = base64.b64encode(item.data).decode('utf-8')
1458
1501
  if item.is_image:
1459
1502
  detail: Literal['auto', 'low', 'high'] = 'auto'
1460
1503
  if metadata := item.vendor_metadata:
@@ -1464,7 +1507,7 @@ class OpenAIResponsesModel(Model):
1464
1507
  )
1465
1508
  content.append(
1466
1509
  responses.ResponseInputImageParam(
1467
- image_url=f'data:{item.media_type};base64,{base64_encoded}',
1510
+ image_url=item.data_uri,
1468
1511
  type='input_image',
1469
1512
  detail=detail,
1470
1513
  )
@@ -1473,7 +1516,7 @@ class OpenAIResponsesModel(Model):
1473
1516
  content.append(
1474
1517
  responses.ResponseInputFileParam(
1475
1518
  type='input_file',
1476
- file_data=f'data:{item.media_type};base64,{base64_encoded}',
1519
+ file_data=item.data_uri,
1477
1520
  # NOTE: Type wise it's not necessary to include the filename, but it's required by the
1478
1521
  # API itself. If we add empty string, the server sends a 500 error - which OpenAI needs
1479
1522
  # to fix. In any case, we add a placeholder name.
@@ -1681,18 +1724,18 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1681
1724
  pass
1682
1725
  elif isinstance(chunk.item, responses.ResponseFunctionWebSearch):
1683
1726
  call_part, _ = _map_web_search_tool_call(chunk.item, self.provider_name)
1684
- yield self._parts_manager.handle_builtin_tool_call_part(
1727
+ yield self._parts_manager.handle_part(
1685
1728
  vendor_part_id=f'{chunk.item.id}-call', part=replace(call_part, args=None)
1686
1729
  )
1687
1730
  elif isinstance(chunk.item, responses.ResponseCodeInterpreterToolCall):
1688
- call_part, _ = _map_code_interpreter_tool_call(chunk.item, self.provider_name)
1731
+ call_part, _, _ = _map_code_interpreter_tool_call(chunk.item, self.provider_name)
1689
1732
 
1690
1733
  args_json = call_part.args_as_json_str()
1691
1734
  # Drop the final `"}` so that we can add code deltas
1692
1735
  args_json_delta = args_json[:-2]
1693
1736
  assert args_json_delta.endswith('code":"')
1694
1737
 
1695
- yield self._parts_manager.handle_builtin_tool_call_part(
1738
+ yield self._parts_manager.handle_part(
1696
1739
  vendor_part_id=f'{chunk.item.id}-call', part=replace(call_part, args=None)
1697
1740
  )
1698
1741
  maybe_event = self._parts_manager.handle_tool_call_delta(
@@ -1701,6 +1744,10 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1701
1744
  )
1702
1745
  if maybe_event is not None: # pragma: no branch
1703
1746
  yield maybe_event
1747
+ elif isinstance(chunk.item, responses.response_output_item.ImageGenerationCall):
1748
+ call_part, _, _ = _map_image_generation_tool_call(chunk.item, self.provider_name)
1749
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item.id}-call', part=call_part)
1750
+
1704
1751
  else:
1705
1752
  warnings.warn( # pragma: no cover
1706
1753
  f'Handling of this item type is not yet implemented. Please report on our GitHub: {chunk}',
@@ -1718,10 +1765,12 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1718
1765
  provider_name=self.provider_name,
1719
1766
  )
1720
1767
  elif isinstance(chunk.item, responses.ResponseCodeInterpreterToolCall):
1721
- _, return_part = _map_code_interpreter_tool_call(chunk.item, self.provider_name)
1722
- yield self._parts_manager.handle_builtin_tool_return_part(
1723
- vendor_part_id=f'{chunk.item.id}-return', part=return_part
1724
- )
1768
+ _, return_part, file_parts = _map_code_interpreter_tool_call(chunk.item, self.provider_name)
1769
+ for i, file_part in enumerate(file_parts):
1770
+ yield self._parts_manager.handle_part(
1771
+ vendor_part_id=f'{chunk.item.id}-file-{i}', part=file_part
1772
+ )
1773
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item.id}-return', part=return_part)
1725
1774
  elif isinstance(chunk.item, responses.ResponseFunctionWebSearch):
1726
1775
  call_part, return_part = _map_web_search_tool_call(chunk.item, self.provider_name)
1727
1776
 
@@ -1732,9 +1781,12 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1732
1781
  if maybe_event is not None: # pragma: no branch
1733
1782
  yield maybe_event
1734
1783
 
1735
- yield self._parts_manager.handle_builtin_tool_return_part(
1736
- vendor_part_id=f'{chunk.item.id}-return', part=return_part
1737
- )
1784
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item.id}-return', part=return_part)
1785
+ elif isinstance(chunk.item, responses.response_output_item.ImageGenerationCall):
1786
+ _, return_part, file_part = _map_image_generation_tool_call(chunk.item, self.provider_name)
1787
+ if file_part: # pragma: no branch
1788
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item.id}-file', part=file_part)
1789
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item.id}-return', part=return_part)
1738
1790
 
1739
1791
  elif isinstance(chunk, responses.ResponseReasoningSummaryPartAddedEvent):
1740
1792
  yield self._parts_manager.handle_thinking_delta(
@@ -1756,8 +1808,8 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1756
1808
  id=chunk.item_id,
1757
1809
  )
1758
1810
 
1759
- # TODO(Marcelo): We should support annotations in the future.
1760
- elif isinstance(chunk, responses.ResponseOutputTextAnnotationAddedEvent): # pragma: no cover
1811
+ elif isinstance(chunk, responses.ResponseOutputTextAnnotationAddedEvent):
1812
+ # TODO(Marcelo): We should support annotations in the future.
1761
1813
  pass # there's nothing we need to do here
1762
1814
 
1763
1815
  elif isinstance(chunk, responses.ResponseTextDeltaEvent):
@@ -1808,6 +1860,28 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
1808
1860
  elif isinstance(chunk, responses.ResponseCodeInterpreterCallInterpretingEvent):
1809
1861
  pass # there's nothing we need to do here
1810
1862
 
1863
+ elif isinstance(chunk, responses.ResponseImageGenCallCompletedEvent): # pragma: no cover
1864
+ pass # there's nothing we need to do here
1865
+
1866
+ elif isinstance(chunk, responses.ResponseImageGenCallGeneratingEvent):
1867
+ pass # there's nothing we need to do here
1868
+
1869
+ elif isinstance(chunk, responses.ResponseImageGenCallInProgressEvent):
1870
+ pass # there's nothing we need to do here
1871
+
1872
+ elif isinstance(chunk, responses.ResponseImageGenCallPartialImageEvent):
1873
+ # Not present on the type, but present on the actual object.
1874
+ # See https://github.com/openai/openai-python/issues/2649
1875
+ output_format = getattr(chunk, 'output_format', 'png')
1876
+ file_part = FilePart(
1877
+ content=BinaryImage(
1878
+ data=base64.b64decode(chunk.partial_image_b64),
1879
+ media_type=f'image/{output_format}',
1880
+ ),
1881
+ id=chunk.item_id,
1882
+ )
1883
+ yield self._parts_manager.handle_part(vendor_part_id=f'{chunk.item_id}-file', part=file_part)
1884
+
1811
1885
  else: # pragma: no cover
1812
1886
  warnings.warn(
1813
1887
  f'Handling of this event type is not yet implemented. Please report on our GitHub: {chunk}',
@@ -1897,12 +1971,29 @@ def _split_combined_tool_call_id(combined_id: str) -> tuple[str, str | None]:
1897
1971
 
1898
1972
  def _map_code_interpreter_tool_call(
1899
1973
  item: responses.ResponseCodeInterpreterToolCall, provider_name: str
1900
- ) -> tuple[BuiltinToolCallPart, BuiltinToolReturnPart]:
1974
+ ) -> tuple[BuiltinToolCallPart, BuiltinToolReturnPart, list[FilePart]]:
1901
1975
  result: dict[str, Any] = {
1902
1976
  'status': item.status,
1903
1977
  }
1978
+
1979
+ file_parts: list[FilePart] = []
1980
+ logs: list[str] = []
1904
1981
  if item.outputs:
1905
- result['outputs'] = [output.model_dump(mode='json') for output in item.outputs]
1982
+ for output in item.outputs:
1983
+ if isinstance(output, responses.response_code_interpreter_tool_call.OutputImage):
1984
+ file_parts.append(
1985
+ FilePart(
1986
+ content=BinaryImage.from_data_uri(output.url),
1987
+ id=item.id,
1988
+ )
1989
+ )
1990
+ elif isinstance(output, responses.response_code_interpreter_tool_call.OutputLogs):
1991
+ logs.append(output.logs)
1992
+ else:
1993
+ assert_never(output)
1994
+
1995
+ if logs:
1996
+ result['logs'] = logs
1906
1997
 
1907
1998
  return (
1908
1999
  BuiltinToolCallPart(
@@ -1920,6 +2011,7 @@ def _map_code_interpreter_tool_call(
1920
2011
  content=result,
1921
2012
  provider_name=provider_name,
1922
2013
  ),
2014
+ file_parts,
1923
2015
  )
1924
2016
 
1925
2017
 
@@ -1953,3 +2045,51 @@ def _map_web_search_tool_call(
1953
2045
  provider_name=provider_name,
1954
2046
  ),
1955
2047
  )
2048
+
2049
+
2050
+ def _map_image_generation_tool_call(
2051
+ item: responses.response_output_item.ImageGenerationCall, provider_name: str
2052
+ ) -> tuple[BuiltinToolCallPart, BuiltinToolReturnPart, FilePart | None]:
2053
+ result = {
2054
+ 'status': item.status,
2055
+ }
2056
+
2057
+ # Not present on the type, but present on the actual object.
2058
+ # See https://github.com/openai/openai-python/issues/2649
2059
+ if background := getattr(item, 'background', None):
2060
+ result['background'] = background
2061
+ if quality := getattr(item, 'quality', None):
2062
+ result['quality'] = quality
2063
+ if size := getattr(item, 'size', None):
2064
+ result['size'] = size
2065
+ if revised_prompt := getattr(item, 'revised_prompt', None):
2066
+ result['revised_prompt'] = revised_prompt
2067
+ output_format = getattr(item, 'output_format', 'png')
2068
+
2069
+ file_part: FilePart | None = None
2070
+ if item.result:
2071
+ file_part = FilePart(
2072
+ content=BinaryImage(
2073
+ data=base64.b64decode(item.result),
2074
+ media_type=f'image/{output_format}',
2075
+ ),
2076
+ id=item.id,
2077
+ )
2078
+
2079
+ # For some reason, the streaming API leaves `status` as `generating` even though generation has completed.
2080
+ result['status'] = 'completed'
2081
+
2082
+ return (
2083
+ BuiltinToolCallPart(
2084
+ tool_name=ImageGenerationTool.kind,
2085
+ tool_call_id=item.id,
2086
+ provider_name=provider_name,
2087
+ ),
2088
+ BuiltinToolReturnPart(
2089
+ tool_name=ImageGenerationTool.kind,
2090
+ tool_call_id=item.id,
2091
+ content=result,
2092
+ provider_name=provider_name,
2093
+ ),
2094
+ file_part,
2095
+ )
@@ -17,6 +17,7 @@ from ..exceptions import UserError
17
17
  from ..messages import (
18
18
  BuiltinToolCallPart,
19
19
  BuiltinToolReturnPart,
20
+ FilePart,
20
21
  ModelMessage,
21
22
  ModelRequest,
22
23
  ModelResponse,
@@ -327,6 +328,9 @@ class TestStreamedResponse(StreamedResponse):
327
328
  elif isinstance(part, ThinkingPart): # pragma: no cover
328
329
  # NOTE: There's no way to reach this part of the code, since we don't generate ThinkingPart on TestModel.
329
330
  assert False, "This should be unreachable — we don't generate ThinkingPart on TestModel."
331
+ elif isinstance(part, FilePart): # pragma: no cover
332
+ # NOTE: There's no way to reach this part of the code, since we don't generate FilePart on TestModel.
333
+ assert False, "This should be unreachable — we don't generate FilePart on TestModel."
330
334
  else:
331
335
  assert_never(part)
332
336
 
pydantic_ai/output.py CHANGED
@@ -37,8 +37,11 @@ T_co = TypeVar('T_co', covariant=True)
37
37
  OutputDataT = TypeVar('OutputDataT', default=str, covariant=True)
38
38
  """Covariant type variable for the output data type of a run."""
39
39
 
40
- OutputMode = Literal['text', 'tool', 'native', 'prompted', 'tool_or_text']
41
- """All output modes."""
40
+ OutputMode = Literal['text', 'tool', 'native', 'prompted', 'tool_or_text', 'image']
41
+ """All output modes.
42
+
43
+ `tool_or_text` is deprecated and no longer in use.
44
+ """
42
45
  StructuredOutputMode = Literal['tool', 'native', 'prompted']
43
46
  """Output modes that can be used for structured output. Used by ModelProfile.default_structured_output_mode"""
44
47
 
@@ -28,6 +28,8 @@ class ModelProfile:
28
28
  """Whether the model supports JSON schema output."""
29
29
  supports_json_object_output: bool = False
30
30
  """Whether the model supports JSON object output."""
31
+ supports_image_output: bool = False
32
+ """Whether the model supports image output."""
31
33
  default_structured_output_mode: StructuredOutputMode = 'tool'
32
34
  """The default structured output mode to use for the model."""
33
35
  prompted_output_template: str = dedent(
@@ -10,10 +10,13 @@ from . import ModelProfile
10
10
 
11
11
  def google_model_profile(model_name: str) -> ModelProfile | None:
12
12
  """Get the model profile for a Google model."""
13
+ is_image_model = 'image' in model_name
13
14
  return ModelProfile(
14
15
  json_schema_transformer=GoogleJsonSchemaTransformer,
15
- supports_json_schema_output=True,
16
- supports_json_object_output=True,
16
+ supports_image_output=is_image_model,
17
+ supports_json_schema_output=not is_image_model,
18
+ supports_json_object_output=not is_image_model,
19
+ supports_tools=not is_image_model,
17
20
  )
18
21
 
19
22
 
@@ -26,7 +26,7 @@ class OpenAIModelProfile(ModelProfile):
26
26
  """Turn off to don't send sampling settings like `temperature` and `top_p` to models that don't support them, like OpenAI's o-series reasoning models."""
27
27
 
28
28
  openai_unsupported_model_settings: Sequence[str] = ()
29
- """A list of model settings that are not supported by the model."""
29
+ """A list of model settings that are not supported by this model."""
30
30
 
31
31
  # Some OpenAI-compatible providers (e.g. MoonshotAI) currently do **not** accept
32
32
  # `tool_choice="required"`. This flag lets the calling model know whether it's
@@ -84,6 +84,7 @@ def openai_model_profile(model_name: str) -> ModelProfile:
84
84
  json_schema_transformer=OpenAIJsonSchemaTransformer,
85
85
  supports_json_schema_output=True,
86
86
  supports_json_object_output=True,
87
+ supports_image_output=is_reasoning_model or '4.1' in model_name or '4o' in model_name,
87
88
  openai_unsupported_model_settings=openai_unsupported_model_settings,
88
89
  openai_system_prompt_role=openai_system_prompt_role,
89
90
  openai_chat_supports_web_search=supports_web_search,