pydantic-ai-slim 1.10.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. pydantic_ai/_agent_graph.py +18 -14
  2. pydantic_ai/_output.py +20 -105
  3. pydantic_ai/_run_context.py +2 -0
  4. pydantic_ai/_tool_manager.py +30 -11
  5. pydantic_ai/agent/__init__.py +34 -32
  6. pydantic_ai/agent/abstract.py +26 -0
  7. pydantic_ai/agent/wrapper.py +5 -0
  8. pydantic_ai/common_tools/duckduckgo.py +1 -1
  9. pydantic_ai/durable_exec/dbos/_agent.py +28 -0
  10. pydantic_ai/durable_exec/prefect/_agent.py +25 -0
  11. pydantic_ai/durable_exec/temporal/_agent.py +25 -0
  12. pydantic_ai/durable_exec/temporal/_run_context.py +2 -1
  13. pydantic_ai/mcp.py +4 -4
  14. pydantic_ai/messages.py +5 -2
  15. pydantic_ai/models/__init__.py +80 -35
  16. pydantic_ai/models/anthropic.py +27 -8
  17. pydantic_ai/models/bedrock.py +3 -3
  18. pydantic_ai/models/cohere.py +5 -3
  19. pydantic_ai/models/fallback.py +25 -4
  20. pydantic_ai/models/function.py +8 -0
  21. pydantic_ai/models/gemini.py +3 -3
  22. pydantic_ai/models/google.py +20 -10
  23. pydantic_ai/models/groq.py +5 -3
  24. pydantic_ai/models/huggingface.py +3 -3
  25. pydantic_ai/models/instrumented.py +29 -13
  26. pydantic_ai/models/mistral.py +6 -4
  27. pydantic_ai/models/openai.py +11 -6
  28. pydantic_ai/models/outlines.py +21 -12
  29. pydantic_ai/models/wrapper.py +1 -1
  30. pydantic_ai/output.py +3 -2
  31. pydantic_ai/profiles/openai.py +5 -2
  32. pydantic_ai/result.py +5 -3
  33. pydantic_ai/tools.py +2 -4
  34. {pydantic_ai_slim-1.10.0.dist-info → pydantic_ai_slim-1.12.0.dist-info}/METADATA +9 -7
  35. {pydantic_ai_slim-1.10.0.dist-info → pydantic_ai_slim-1.12.0.dist-info}/RECORD +38 -38
  36. {pydantic_ai_slim-1.10.0.dist-info → pydantic_ai_slim-1.12.0.dist-info}/WHEEL +0 -0
  37. {pydantic_ai_slim-1.10.0.dist-info → pydantic_ai_slim-1.12.0.dist-info}/entry_points.txt +0 -0
  38. {pydantic_ai_slim-1.10.0.dist-info → pydantic_ai_slim-1.12.0.dist-info}/licenses/LICENSE +0 -0
@@ -263,6 +263,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
263
263
  message_history: Sequence[_messages.ModelMessage] | None = None,
264
264
  deferred_tool_results: DeferredToolResults | None = None,
265
265
  model: models.Model | models.KnownModelName | str | None = None,
266
+ instructions: Instructions[AgentDepsT] = None,
266
267
  deps: AgentDepsT = None,
267
268
  model_settings: ModelSettings | None = None,
268
269
  usage_limits: _usage.UsageLimits | None = None,
@@ -282,6 +283,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
282
283
  message_history: Sequence[_messages.ModelMessage] | None = None,
283
284
  deferred_tool_results: DeferredToolResults | None = None,
284
285
  model: models.Model | models.KnownModelName | str | None = None,
286
+ instructions: Instructions[AgentDepsT] = None,
285
287
  deps: AgentDepsT = None,
286
288
  model_settings: ModelSettings | None = None,
287
289
  usage_limits: _usage.UsageLimits | None = None,
@@ -300,6 +302,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
300
302
  message_history: Sequence[_messages.ModelMessage] | None = None,
301
303
  deferred_tool_results: DeferredToolResults | None = None,
302
304
  model: models.Model | models.KnownModelName | str | None = None,
305
+ instructions: Instructions[AgentDepsT] = None,
303
306
  deps: AgentDepsT = None,
304
307
  model_settings: ModelSettings | None = None,
305
308
  usage_limits: _usage.UsageLimits | None = None,
@@ -334,6 +337,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
334
337
  message_history: History of the conversation so far.
335
338
  deferred_tool_results: Optional results for deferred tool calls in the message history.
336
339
  model: Optional model to use for this run, required if `model` was not set when creating the agent.
340
+ instructions: Optional additional instructions to use for this run.
337
341
  deps: Optional dependencies to use for this run.
338
342
  model_settings: Optional settings to use for this model's request.
339
343
  usage_limits: Optional limits on model request count or token usage.
@@ -358,6 +362,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
358
362
  message_history=message_history,
359
363
  deferred_tool_results=deferred_tool_results,
360
364
  model=model,
365
+ instructions=instructions,
361
366
  deps=deps,
362
367
  model_settings=model_settings,
363
368
  usage_limits=usage_limits,
@@ -378,6 +383,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
378
383
  message_history: Sequence[_messages.ModelMessage] | None = None,
379
384
  deferred_tool_results: DeferredToolResults | None = None,
380
385
  model: models.Model | models.KnownModelName | str | None = None,
386
+ instructions: Instructions[AgentDepsT] = None,
381
387
  deps: AgentDepsT = None,
382
388
  model_settings: ModelSettings | None = None,
383
389
  usage_limits: _usage.UsageLimits | None = None,
@@ -397,6 +403,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
397
403
  message_history: Sequence[_messages.ModelMessage] | None = None,
398
404
  deferred_tool_results: DeferredToolResults | None = None,
399
405
  model: models.Model | models.KnownModelName | str | None = None,
406
+ instructions: Instructions[AgentDepsT] = None,
400
407
  deps: AgentDepsT = None,
401
408
  model_settings: ModelSettings | None = None,
402
409
  usage_limits: _usage.UsageLimits | None = None,
@@ -415,6 +422,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
415
422
  message_history: Sequence[_messages.ModelMessage] | None = None,
416
423
  deferred_tool_results: DeferredToolResults | None = None,
417
424
  model: models.Model | models.KnownModelName | str | None = None,
425
+ instructions: Instructions[AgentDepsT] = None,
418
426
  deps: AgentDepsT = None,
419
427
  model_settings: ModelSettings | None = None,
420
428
  usage_limits: _usage.UsageLimits | None = None,
@@ -448,6 +456,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
448
456
  message_history: History of the conversation so far.
449
457
  deferred_tool_results: Optional results for deferred tool calls in the message history.
450
458
  model: Optional model to use for this run, required if `model` was not set when creating the agent.
459
+ instructions: Optional additional instructions to use for this run.
451
460
  deps: Optional dependencies to use for this run.
452
461
  model_settings: Optional settings to use for this model's request.
453
462
  usage_limits: Optional limits on model request count or token usage.
@@ -471,6 +480,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
471
480
  message_history=message_history,
472
481
  deferred_tool_results=deferred_tool_results,
473
482
  model=model,
483
+ instructions=instructions,
474
484
  deps=deps,
475
485
  model_settings=model_settings,
476
486
  usage_limits=usage_limits,
@@ -491,6 +501,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
491
501
  message_history: Sequence[_messages.ModelMessage] | None = None,
492
502
  deferred_tool_results: DeferredToolResults | None = None,
493
503
  model: models.Model | models.KnownModelName | str | None = None,
504
+ instructions: Instructions[AgentDepsT] = None,
494
505
  deps: AgentDepsT = None,
495
506
  model_settings: ModelSettings | None = None,
496
507
  usage_limits: _usage.UsageLimits | None = None,
@@ -510,6 +521,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
510
521
  message_history: Sequence[_messages.ModelMessage] | None = None,
511
522
  deferred_tool_results: DeferredToolResults | None = None,
512
523
  model: models.Model | models.KnownModelName | str | None = None,
524
+ instructions: Instructions[AgentDepsT] = None,
513
525
  deps: AgentDepsT = None,
514
526
  model_settings: ModelSettings | None = None,
515
527
  usage_limits: _usage.UsageLimits | None = None,
@@ -529,6 +541,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
529
541
  message_history: Sequence[_messages.ModelMessage] | None = None,
530
542
  deferred_tool_results: DeferredToolResults | None = None,
531
543
  model: models.Model | models.KnownModelName | str | None = None,
544
+ instructions: Instructions[AgentDepsT] = None,
532
545
  deps: AgentDepsT = None,
533
546
  model_settings: ModelSettings | None = None,
534
547
  usage_limits: _usage.UsageLimits | None = None,
@@ -560,6 +573,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
560
573
  message_history: History of the conversation so far.
561
574
  deferred_tool_results: Optional results for deferred tool calls in the message history.
562
575
  model: Optional model to use for this run, required if `model` was not set when creating the agent.
576
+ instructions: Optional additional instructions to use for this run.
563
577
  deps: Optional dependencies to use for this run.
564
578
  model_settings: Optional settings to use for this model's request.
565
579
  usage_limits: Optional limits on model request count or token usage.
@@ -584,6 +598,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
584
598
  message_history=message_history,
585
599
  deferred_tool_results=deferred_tool_results,
586
600
  model=model,
601
+ instructions=instructions,
587
602
  deps=deps,
588
603
  model_settings=model_settings,
589
604
  usage_limits=usage_limits,
@@ -605,6 +620,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
605
620
  message_history: Sequence[_messages.ModelMessage] | None = None,
606
621
  deferred_tool_results: DeferredToolResults | None = None,
607
622
  model: models.Model | models.KnownModelName | str | None = None,
623
+ instructions: Instructions[AgentDepsT] = None,
608
624
  deps: AgentDepsT = None,
609
625
  model_settings: ModelSettings | None = None,
610
626
  usage_limits: _usage.UsageLimits | None = None,
@@ -623,6 +639,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
623
639
  message_history: Sequence[_messages.ModelMessage] | None = None,
624
640
  deferred_tool_results: DeferredToolResults | None = None,
625
641
  model: models.Model | models.KnownModelName | str | None = None,
642
+ instructions: Instructions[AgentDepsT] = None,
626
643
  deps: AgentDepsT = None,
627
644
  model_settings: ModelSettings | None = None,
628
645
  usage_limits: _usage.UsageLimits | None = None,
@@ -640,6 +657,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
640
657
  message_history: Sequence[_messages.ModelMessage] | None = None,
641
658
  deferred_tool_results: DeferredToolResults | None = None,
642
659
  model: models.Model | models.KnownModelName | str | None = None,
660
+ instructions: Instructions[AgentDepsT] = None,
643
661
  deps: AgentDepsT = None,
644
662
  model_settings: ModelSettings | None = None,
645
663
  usage_limits: _usage.UsageLimits | None = None,
@@ -689,6 +707,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
689
707
  message_history: History of the conversation so far.
690
708
  deferred_tool_results: Optional results for deferred tool calls in the message history.
691
709
  model: Optional model to use for this run, required if `model` was not set when creating the agent.
710
+ instructions: Optional additional instructions to use for this run.
692
711
  deps: Optional dependencies to use for this run.
693
712
  model_settings: Optional settings to use for this model's request.
694
713
  usage_limits: Optional limits on model request count or token usage.
@@ -713,6 +732,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
713
732
  message_history=message_history,
714
733
  deferred_tool_results=deferred_tool_results,
715
734
  model=model,
735
+ instructions=instructions,
716
736
  deps=deps,
717
737
  model_settings=model_settings,
718
738
  usage_limits=usage_limits,
@@ -731,6 +751,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
731
751
  message_history: Sequence[_messages.ModelMessage] | None = None,
732
752
  deferred_tool_results: DeferredToolResults | None = None,
733
753
  model: models.Model | models.KnownModelName | str | None = None,
754
+ instructions: Instructions[AgentDepsT] = None,
734
755
  deps: AgentDepsT = None,
735
756
  model_settings: ModelSettings | None = None,
736
757
  usage_limits: _usage.UsageLimits | None = None,
@@ -750,6 +771,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
750
771
  message_history: Sequence[_messages.ModelMessage] | None = None,
751
772
  deferred_tool_results: DeferredToolResults | None = None,
752
773
  model: models.Model | models.KnownModelName | str | None = None,
774
+ instructions: Instructions[AgentDepsT] = None,
753
775
  deps: AgentDepsT = None,
754
776
  model_settings: ModelSettings | None = None,
755
777
  usage_limits: _usage.UsageLimits | None = None,
@@ -769,6 +791,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
769
791
  message_history: Sequence[_messages.ModelMessage] | None = None,
770
792
  deferred_tool_results: DeferredToolResults | None = None,
771
793
  model: models.Model | models.KnownModelName | str | None = None,
794
+ instructions: Instructions[AgentDepsT] = None,
772
795
  deps: AgentDepsT = None,
773
796
  model_settings: ModelSettings | None = None,
774
797
  usage_limits: _usage.UsageLimits | None = None,
@@ -843,6 +866,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
843
866
  message_history: History of the conversation so far.
844
867
  deferred_tool_results: Optional results for deferred tool calls in the message history.
845
868
  model: Optional model to use for this run, required if `model` was not set when creating the agent.
869
+ instructions: Optional additional instructions to use for this run.
846
870
  deps: Optional dependencies to use for this run.
847
871
  model_settings: Optional settings to use for this model's request.
848
872
  usage_limits: Optional limits on model request count or token usage.
@@ -876,6 +900,7 @@ class TemporalAgent(WrapperAgent[AgentDepsT, OutputDataT]):
876
900
  message_history=message_history,
877
901
  deferred_tool_results=deferred_tool_results,
878
902
  model=model,
903
+ instructions=instructions,
879
904
  deps=deps,
880
905
  model_settings=model_settings,
881
906
  usage_limits=usage_limits,
@@ -14,7 +14,7 @@ AgentDepsT = TypeVar('AgentDepsT', default=None, covariant=True)
14
14
  class TemporalRunContext(RunContext[AgentDepsT]):
15
15
  """The [`RunContext`][pydantic_ai.tools.RunContext] subclass to use to serialize and deserialize the run context for use inside a Temporal activity.
16
16
 
17
- By default, only the `deps`, `retries`, `tool_call_id`, `tool_name`, `tool_call_approved`, `retry`, `max_retries` and `run_step` attributes will be available.
17
+ By default, only the `deps`, `retries`, `tool_call_id`, `tool_name`, `tool_call_approved`, `retry`, `max_retries`, `run_step` and `partial_output` attributes will be available.
18
18
  To make another attribute available, create a `TemporalRunContext` subclass with a custom `serialize_run_context` class method that returns a dictionary that includes the attribute and pass it to [`TemporalAgent`][pydantic_ai.durable_exec.temporal.TemporalAgent].
19
19
  """
20
20
 
@@ -49,6 +49,7 @@ class TemporalRunContext(RunContext[AgentDepsT]):
49
49
  'retry': ctx.retry,
50
50
  'max_retries': ctx.max_retries,
51
51
  'run_step': ctx.run_step,
52
+ 'partial_output': ctx.partial_output,
52
53
  }
53
54
 
54
55
  @classmethod
pydantic_ai/mcp.py CHANGED
@@ -726,9 +726,9 @@ class _MCPServerHTTP(MCPServer):
726
726
  MemoryObjectReceiveStream[SessionMessage | Exception],
727
727
  MemoryObjectSendStream[SessionMessage],
728
728
  ]
729
- ]: # pragma: no cover
729
+ ]:
730
730
  if self.http_client and self.headers:
731
- raise ValueError('`http_client` is mutually exclusive with `headers`.')
731
+ raise ValueError('`http_client` is mutually exclusive with `headers`.') # pragma: no cover
732
732
 
733
733
  transport_client_partial = functools.partial(
734
734
  self._transport_client,
@@ -737,7 +737,7 @@ class _MCPServerHTTP(MCPServer):
737
737
  sse_read_timeout=self.read_timeout,
738
738
  )
739
739
 
740
- if self.http_client is not None:
740
+ if self.http_client is not None: # pragma: no cover
741
741
 
742
742
  def httpx_client_factory(
743
743
  headers: dict[str, str] | None = None,
@@ -866,7 +866,7 @@ class MCPServerStreamableHTTP(_MCPServerHTTP):
866
866
 
867
867
  @property
868
868
  def _transport_client(self):
869
- return streamablehttp_client # pragma: no cover
869
+ return streamablehttp_client
870
870
 
871
871
  def __eq__(self, value: object, /) -> bool:
872
872
  return super().__eq__(value) and isinstance(value, MCPServerStreamableHTTP) and self.url == value.url
pydantic_ai/messages.py CHANGED
@@ -485,7 +485,7 @@ class BinaryContent:
485
485
  """
486
486
 
487
487
  _identifier: Annotated[str | None, pydantic.Field(alias='identifier', default=None, exclude=True)] = field(
488
- compare=False, default=None, repr=False
488
+ compare=False, default=None
489
489
  )
490
490
 
491
491
  kind: Literal['binary'] = 'binary'
@@ -888,7 +888,10 @@ class RetryPromptPart:
888
888
  description = self.content
889
889
  else:
890
890
  json_errors = error_details_ta.dump_json(self.content, exclude={'__all__': {'ctx'}}, indent=2)
891
- description = f'{len(self.content)} validation errors: {json_errors.decode()}'
891
+ plural = isinstance(self.content, list) and len(self.content) != 1
892
+ description = (
893
+ f'{len(self.content)} validation error{"s" if plural else ""}:\n```json\n{json_errors.decode()}\n```'
894
+ )
892
895
  return f'{description}\n\nFix the errors and try again.'
893
896
 
894
897
  def otel_event(self, settings: InstrumentationSettings) -> Event:
@@ -21,7 +21,7 @@ from typing_extensions import TypeAliasType, TypedDict
21
21
 
22
22
  from .. import _utils
23
23
  from .._json_schema import JsonSchemaTransformer
24
- from .._output import OutputObjectDefinition
24
+ from .._output import OutputObjectDefinition, PromptedOutputSchema
25
25
  from .._parts_manager import ModelResponsePartsManager
26
26
  from .._run_context import RunContext
27
27
  from ..builtin_tools import AbstractBuiltinTool
@@ -309,6 +309,7 @@ class ModelRequestParameters:
309
309
  output_mode: OutputMode = 'text'
310
310
  output_object: OutputObjectDefinition | None = None
311
311
  output_tools: list[ToolDefinition] = field(default_factory=list)
312
+ prompted_output_template: str | None = None
312
313
  allow_text_output: bool = True
313
314
  allow_image_output: bool = False
314
315
 
@@ -316,6 +317,12 @@ class ModelRequestParameters:
316
317
  def tool_defs(self) -> dict[str, ToolDefinition]:
317
318
  return {tool_def.name: tool_def for tool_def in [*self.function_tools, *self.output_tools]}
318
319
 
320
+ @cached_property
321
+ def prompted_output_instructions(self) -> str | None:
322
+ if self.output_mode == 'prompted' and self.prompted_output_template and self.output_object:
323
+ return PromptedOutputSchema.build_instructions(self.prompted_output_template, self.output_object)
324
+ return None
325
+
319
326
  __repr__ = _utils.dataclasses_no_defaults_repr
320
327
 
321
328
 
@@ -408,23 +415,52 @@ class Model(ABC):
408
415
  ) -> tuple[ModelSettings | None, ModelRequestParameters]:
409
416
  """Prepare request inputs before they are passed to the provider.
410
417
 
411
- This merges the given ``model_settings`` with the model's own ``settings`` attribute and ensures
412
- ``customize_request_parameters`` is applied to the resolved
418
+ This merges the given `model_settings` with the model's own `settings` attribute and ensures
419
+ `customize_request_parameters` is applied to the resolved
413
420
  [`ModelRequestParameters`][pydantic_ai.models.ModelRequestParameters]. Subclasses can override this method if
414
421
  they need to customize the preparation flow further, but most implementations should simply call
415
- ``self.prepare_request(...)`` at the start of their ``request`` (and related) methods.
422
+ `self.prepare_request(...)` at the start of their `request` (and related) methods.
416
423
  """
417
424
  model_settings = merge_model_settings(self.settings, model_settings)
418
425
 
419
- if builtin_tools := model_request_parameters.builtin_tools:
426
+ params = self.customize_request_parameters(model_request_parameters)
427
+
428
+ if builtin_tools := params.builtin_tools:
420
429
  # Deduplicate builtin tools
421
- model_request_parameters = replace(
422
- model_request_parameters,
430
+ params = replace(
431
+ params,
423
432
  builtin_tools=list({tool.unique_id: tool for tool in builtin_tools}.values()),
424
433
  )
425
434
 
426
- model_request_parameters = self.customize_request_parameters(model_request_parameters)
427
- return model_settings, model_request_parameters
435
+ if params.output_mode == 'auto':
436
+ output_mode = self.profile.default_structured_output_mode
437
+ params = replace(
438
+ params,
439
+ output_mode=output_mode,
440
+ allow_text_output=output_mode in ('native', 'prompted'),
441
+ )
442
+
443
+ # Reset irrelevant fields
444
+ if params.output_tools and params.output_mode != 'tool':
445
+ params = replace(params, output_tools=[])
446
+ if params.output_object and params.output_mode not in ('native', 'prompted'):
447
+ params = replace(params, output_object=None)
448
+ if params.prompted_output_template and params.output_mode != 'prompted':
449
+ params = replace(params, prompted_output_template=None) # pragma: no cover
450
+
451
+ # Set default prompted output template
452
+ if params.output_mode == 'prompted' and not params.prompted_output_template:
453
+ params = replace(params, prompted_output_template=self.profile.prompted_output_template)
454
+
455
+ # Check if output mode is supported
456
+ if params.output_mode == 'native' and not self.profile.supports_json_schema_output:
457
+ raise UserError('Native structured output is not supported by this model.')
458
+ if params.output_mode == 'tool' and not self.profile.supports_tools:
459
+ raise UserError('Tool output is not supported by this model.')
460
+ if params.allow_image_output and not self.profile.supports_image_output:
461
+ raise UserError('Image output is not supported by this model.')
462
+
463
+ return model_settings, params
428
464
 
429
465
  @property
430
466
  @abstractmethod
@@ -462,13 +498,17 @@ class Model(ABC):
462
498
  return None
463
499
 
464
500
  @staticmethod
465
- def _get_instructions(messages: list[ModelMessage]) -> str | None:
501
+ def _get_instructions(
502
+ messages: list[ModelMessage], model_request_parameters: ModelRequestParameters | None = None
503
+ ) -> str | None:
466
504
  """Get instructions from the first ModelRequest found when iterating messages in reverse.
467
505
 
468
506
  In the case that a "mock" request was generated to include a tool-return part for a result tool,
469
507
  we want to use the instructions from the second-to-most-recent request (which should correspond to the
470
508
  original request that generated the response that resulted in the tool-return part).
471
509
  """
510
+ instructions = None
511
+
472
512
  last_two_requests: list[ModelRequest] = []
473
513
  for message in reversed(messages):
474
514
  if isinstance(message, ModelRequest):
@@ -476,33 +516,38 @@ class Model(ABC):
476
516
  if len(last_two_requests) == 2:
477
517
  break
478
518
  if message.instructions is not None:
479
- return message.instructions
519
+ instructions = message.instructions
520
+ break
480
521
 
481
522
  # If we don't have two requests, and we didn't already return instructions, there are definitely not any:
482
- if len(last_two_requests) != 2:
483
- return None
484
-
485
- most_recent_request = last_two_requests[0]
486
- second_most_recent_request = last_two_requests[1]
487
-
488
- # If we've gotten this far and the most recent request consists of only tool-return parts or retry-prompt parts,
489
- # we use the instructions from the second-to-most-recent request. This is necessary because when handling
490
- # result tools, we generate a "mock" ModelRequest with a tool-return part for it, and that ModelRequest will not
491
- # have the relevant instructions from the agent.
492
-
493
- # While it's possible that you could have a message history where the most recent request has only tool returns,
494
- # I believe there is no way to achieve that would _change_ the instructions without manually crafting the most
495
- # recent message. That might make sense in principle for some usage pattern, but it's enough of an edge case
496
- # that I think it's not worth worrying about, since you can work around this by inserting another ModelRequest
497
- # with no parts at all immediately before the request that has the tool calls (that works because we only look
498
- # at the two most recent ModelRequests here).
499
-
500
- # If you have a use case where this causes pain, please open a GitHub issue and we can discuss alternatives.
501
-
502
- if all(p.part_kind == 'tool-return' or p.part_kind == 'retry-prompt' for p in most_recent_request.parts):
503
- return second_most_recent_request.instructions
504
-
505
- return None
523
+ if instructions is None and len(last_two_requests) == 2:
524
+ most_recent_request = last_two_requests[0]
525
+ second_most_recent_request = last_two_requests[1]
526
+
527
+ # If we've gotten this far and the most recent request consists of only tool-return parts or retry-prompt parts,
528
+ # we use the instructions from the second-to-most-recent request. This is necessary because when handling
529
+ # result tools, we generate a "mock" ModelRequest with a tool-return part for it, and that ModelRequest will not
530
+ # have the relevant instructions from the agent.
531
+
532
+ # While it's possible that you could have a message history where the most recent request has only tool returns,
533
+ # I believe there is no way to achieve that would _change_ the instructions without manually crafting the most
534
+ # recent message. That might make sense in principle for some usage pattern, but it's enough of an edge case
535
+ # that I think it's not worth worrying about, since you can work around this by inserting another ModelRequest
536
+ # with no parts at all immediately before the request that has the tool calls (that works because we only look
537
+ # at the two most recent ModelRequests here).
538
+
539
+ # If you have a use case where this causes pain, please open a GitHub issue and we can discuss alternatives.
540
+
541
+ if all(p.part_kind == 'tool-return' or p.part_kind == 'retry-prompt' for p in most_recent_request.parts):
542
+ instructions = second_most_recent_request.instructions
543
+
544
+ if model_request_parameters and (output_instructions := model_request_parameters.prompted_output_instructions):
545
+ if instructions:
546
+ instructions = '\n\n'.join([instructions, output_instructions])
547
+ else:
548
+ instructions = output_instructions
549
+
550
+ return instructions
506
551
 
507
552
 
508
553
  @dataclass
@@ -39,7 +39,7 @@ from ..messages import (
39
39
  from ..profiles import ModelProfileSpec
40
40
  from ..providers import Provider, infer_provider
41
41
  from ..providers.anthropic import AsyncAnthropicClient
42
- from ..settings import ModelSettings
42
+ from ..settings import ModelSettings, merge_model_settings
43
43
  from ..tools import ToolDefinition
44
44
  from . import Model, ModelRequestParameters, StreamedResponse, check_allow_model_requests, download_item, get_user_agent
45
45
 
@@ -240,6 +240,27 @@ class AnthropicModel(Model):
240
240
  async with response:
241
241
  yield await self._process_streamed_response(response, model_request_parameters)
242
242
 
243
+ def prepare_request(
244
+ self, model_settings: ModelSettings | None, model_request_parameters: ModelRequestParameters
245
+ ) -> tuple[ModelSettings | None, ModelRequestParameters]:
246
+ settings = merge_model_settings(self.settings, model_settings)
247
+ if (
248
+ model_request_parameters.output_tools
249
+ and settings
250
+ and (thinking := settings.get('anthropic_thinking'))
251
+ and thinking.get('type') == 'enabled'
252
+ ):
253
+ if model_request_parameters.output_mode == 'auto':
254
+ model_request_parameters = replace(model_request_parameters, output_mode='prompted')
255
+ elif (
256
+ model_request_parameters.output_mode == 'tool' and not model_request_parameters.allow_text_output
257
+ ): # pragma: no branch
258
+ # This would result in `tool_choice=required`, which Anthropic does not support with thinking.
259
+ raise UserError(
260
+ 'Anthropic does not support thinking and output tools at the same time. Use `output_type=PromptedOutput(...)` instead.'
261
+ )
262
+ return super().prepare_request(model_settings, model_request_parameters)
263
+
243
264
  @overload
244
265
  async def _messages_create(
245
266
  self,
@@ -278,17 +299,13 @@ class AnthropicModel(Model):
278
299
  else:
279
300
  if not model_request_parameters.allow_text_output:
280
301
  tool_choice = {'type': 'any'}
281
- if (thinking := model_settings.get('anthropic_thinking')) and thinking.get('type') == 'enabled':
282
- raise UserError(
283
- 'Anthropic does not support thinking and output tools at the same time. Use `output_type=PromptedOutput(...)` instead.'
284
- )
285
302
  else:
286
303
  tool_choice = {'type': 'auto'}
287
304
 
288
305
  if (allow_parallel_tool_calls := model_settings.get('parallel_tool_calls')) is not None:
289
306
  tool_choice['disable_parallel_tool_use'] = not allow_parallel_tool_calls
290
307
 
291
- system_prompt, anthropic_messages = await self._map_message(messages)
308
+ system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters)
292
309
 
293
310
  try:
294
311
  extra_headers = model_settings.get('extra_headers', {})
@@ -446,7 +463,9 @@ class AnthropicModel(Model):
446
463
  )
447
464
  return tools, mcp_servers, beta_features
448
465
 
449
- async def _map_message(self, messages: list[ModelMessage]) -> tuple[str, list[BetaMessageParam]]: # noqa: C901
466
+ async def _map_message( # noqa: C901
467
+ self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters
468
+ ) -> tuple[str, list[BetaMessageParam]]:
450
469
  """Just maps a `pydantic_ai.Message` to a `anthropic.types.MessageParam`."""
451
470
  system_prompt_parts: list[str] = []
452
471
  anthropic_messages: list[BetaMessageParam] = []
@@ -615,7 +634,7 @@ class AnthropicModel(Model):
615
634
  anthropic_messages.append(BetaMessageParam(role='assistant', content=assistant_content_params))
616
635
  else:
617
636
  assert_never(m)
618
- if instructions := self._get_instructions(messages):
637
+ if instructions := self._get_instructions(messages, model_request_parameters):
619
638
  system_prompt_parts.insert(0, instructions)
620
639
  system_prompt = '\n\n'.join(system_prompt_parts)
621
640
  return system_prompt, anthropic_messages
@@ -374,7 +374,7 @@ class BedrockConverseModel(Model):
374
374
  model_settings: BedrockModelSettings | None,
375
375
  model_request_parameters: ModelRequestParameters,
376
376
  ) -> ConverseResponseTypeDef | ConverseStreamResponseTypeDef:
377
- system_prompt, bedrock_messages = await self._map_messages(messages)
377
+ system_prompt, bedrock_messages = await self._map_messages(messages, model_request_parameters)
378
378
  inference_config = self._map_inference_config(model_settings)
379
379
 
380
380
  params: ConverseRequestTypeDef = {
@@ -450,7 +450,7 @@ class BedrockConverseModel(Model):
450
450
  return tool_config
451
451
 
452
452
  async def _map_messages( # noqa: C901
453
- self, messages: list[ModelMessage]
453
+ self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters
454
454
  ) -> tuple[list[SystemContentBlockTypeDef], list[MessageUnionTypeDef]]:
455
455
  """Maps a `pydantic_ai.Message` to the Bedrock `MessageUnionTypeDef`.
456
456
 
@@ -561,7 +561,7 @@ class BedrockConverseModel(Model):
561
561
  processed_messages.append(current_message)
562
562
  last_message = cast(dict[str, Any], current_message)
563
563
 
564
- if instructions := self._get_instructions(messages):
564
+ if instructions := self._get_instructions(messages, model_request_parameters):
565
565
  system_prompt.insert(0, {'text': instructions})
566
566
 
567
567
  return system_prompt, processed_messages
@@ -178,7 +178,7 @@ class CohereModel(Model):
178
178
  if model_request_parameters.builtin_tools:
179
179
  raise UserError('Cohere does not support built-in tools')
180
180
 
181
- cohere_messages = self._map_messages(messages)
181
+ cohere_messages = self._map_messages(messages, model_request_parameters)
182
182
  try:
183
183
  return await self.client.chat(
184
184
  model=self._model_name,
@@ -229,7 +229,9 @@ class CohereModel(Model):
229
229
  provider_details=provider_details,
230
230
  )
231
231
 
232
- def _map_messages(self, messages: list[ModelMessage]) -> list[ChatMessageV2]:
232
+ def _map_messages(
233
+ self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters
234
+ ) -> list[ChatMessageV2]:
233
235
  """Just maps a `pydantic_ai.Message` to a `cohere.ChatMessageV2`."""
234
236
  cohere_messages: list[ChatMessageV2] = []
235
237
  for message in messages:
@@ -268,7 +270,7 @@ class CohereModel(Model):
268
270
  cohere_messages.append(message_param)
269
271
  else:
270
272
  assert_never(message)
271
- if instructions := self._get_instructions(messages):
273
+ if instructions := self._get_instructions(messages, model_request_parameters):
272
274
  cohere_messages.insert(0, SystemChatMessageV2(role='system', content=instructions))
273
275
  return cohere_messages
274
276
 
@@ -3,6 +3,7 @@ from __future__ import annotations as _annotations
3
3
  from collections.abc import AsyncIterator, Callable
4
4
  from contextlib import AsyncExitStack, asynccontextmanager, suppress
5
5
  from dataclasses import dataclass, field
6
+ from functools import cached_property
6
7
  from typing import TYPE_CHECKING, Any
7
8
 
8
9
  from opentelemetry.trace import get_current_span
@@ -11,6 +12,7 @@ from pydantic_ai._run_context import RunContext
11
12
  from pydantic_ai.models.instrumented import InstrumentedModel
12
13
 
13
14
  from ..exceptions import FallbackExceptionGroup, ModelHTTPError
15
+ from ..profiles import ModelProfile
14
16
  from . import KnownModelName, Model, ModelRequestParameters, StreamedResponse, infer_model
15
17
 
16
18
  if TYPE_CHECKING:
@@ -78,6 +80,7 @@ class FallbackModel(Model):
78
80
 
79
81
  for model in self.models:
80
82
  try:
83
+ _, prepared_parameters = model.prepare_request(model_settings, model_request_parameters)
81
84
  response = await model.request(messages, model_settings, model_request_parameters)
82
85
  except Exception as exc:
83
86
  if self._fallback_on(exc):
@@ -85,7 +88,7 @@ class FallbackModel(Model):
85
88
  continue
86
89
  raise exc
87
90
 
88
- self._set_span_attributes(model)
91
+ self._set_span_attributes(model, prepared_parameters)
89
92
  return response
90
93
 
91
94
  raise FallbackExceptionGroup('All models from FallbackModel failed', exceptions)
@@ -104,6 +107,7 @@ class FallbackModel(Model):
104
107
  for model in self.models:
105
108
  async with AsyncExitStack() as stack:
106
109
  try:
110
+ _, prepared_parameters = model.prepare_request(model_settings, model_request_parameters)
107
111
  response = await stack.enter_async_context(
108
112
  model.request_stream(messages, model_settings, model_request_parameters, run_context)
109
113
  )
@@ -113,19 +117,36 @@ class FallbackModel(Model):
113
117
  continue
114
118
  raise exc # pragma: no cover
115
119
 
116
- self._set_span_attributes(model)
120
+ self._set_span_attributes(model, prepared_parameters)
117
121
  yield response
118
122
  return
119
123
 
120
124
  raise FallbackExceptionGroup('All models from FallbackModel failed', exceptions)
121
125
 
122
- def _set_span_attributes(self, model: Model):
126
+ @cached_property
127
+ def profile(self) -> ModelProfile:
128
+ raise NotImplementedError('FallbackModel does not have its own model profile.')
129
+
130
+ def customize_request_parameters(self, model_request_parameters: ModelRequestParameters) -> ModelRequestParameters:
131
+ return model_request_parameters # pragma: no cover
132
+
133
+ def prepare_request(
134
+ self, model_settings: ModelSettings | None, model_request_parameters: ModelRequestParameters
135
+ ) -> tuple[ModelSettings | None, ModelRequestParameters]:
136
+ return model_settings, model_request_parameters
137
+
138
+ def _set_span_attributes(self, model: Model, model_request_parameters: ModelRequestParameters):
123
139
  with suppress(Exception):
124
140
  span = get_current_span()
125
141
  if span.is_recording():
126
142
  attributes = getattr(span, 'attributes', {})
127
143
  if attributes.get('gen_ai.request.model') == self.model_name: # pragma: no branch
128
- span.set_attributes(InstrumentedModel.model_attributes(model))
144
+ span.set_attributes(
145
+ {
146
+ **InstrumentedModel.model_attributes(model),
147
+ **InstrumentedModel.model_request_parameters_attributes(model_request_parameters),
148
+ }
149
+ )
129
150
 
130
151
 
131
152
  def _default_fallback_condition_factory(exceptions: tuple[type[Exception], ...]) -> Callable[[Exception], bool]: