pydantic-ai-slim 0.2.19__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydantic-ai-slim might be problematic. Click here for more details.

@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations as _annotations
2
2
 
3
3
  import io
4
+ import warnings
4
5
  from collections.abc import AsyncGenerator, AsyncIterable, AsyncIterator
5
6
  from contextlib import asynccontextmanager
6
7
  from dataclasses import dataclass, field
@@ -23,6 +24,7 @@ from ..messages import (
23
24
  RetryPromptPart,
24
25
  SystemPromptPart,
25
26
  TextPart,
27
+ ThinkingPart,
26
28
  ToolCallPart,
27
29
  ToolReturnPart,
28
30
  UserPromptPart,
@@ -52,9 +54,15 @@ try:
52
54
  BetaRawMessageStartEvent,
53
55
  BetaRawMessageStopEvent,
54
56
  BetaRawMessageStreamEvent,
57
+ BetaRedactedThinkingBlock,
58
+ BetaSignatureDelta,
55
59
  BetaTextBlock,
56
60
  BetaTextBlockParam,
57
61
  BetaTextDelta,
62
+ BetaThinkingBlock,
63
+ BetaThinkingBlockParam,
64
+ BetaThinkingConfigParam,
65
+ BetaThinkingDelta,
58
66
  BetaToolChoiceParam,
59
67
  BetaToolParam,
60
68
  BetaToolResultBlockParam,
@@ -90,7 +98,14 @@ class AnthropicModelSettings(ModelSettings, total=False):
90
98
  anthropic_metadata: BetaMetadataParam
91
99
  """An object describing metadata about the request.
92
100
 
93
- Contains `user_id`, an external identifier for the user who is associated with the request."""
101
+ Contains `user_id`, an external identifier for the user who is associated with the request.
102
+ """
103
+
104
+ anthropic_thinking: BetaThinkingConfigParam
105
+ """Determine whether the model should generate a thinking block.
106
+
107
+ See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information.
108
+ """
94
109
 
95
110
 
96
111
  @dataclass(init=False)
@@ -227,6 +242,7 @@ class AnthropicModel(Model):
227
242
  tools=tools or NOT_GIVEN,
228
243
  tool_choice=tool_choice or NOT_GIVEN,
229
244
  stream=stream,
245
+ thinking=model_settings.get('anthropic_thinking', NOT_GIVEN),
230
246
  stop_sequences=model_settings.get('stop_sequences', NOT_GIVEN),
231
247
  temperature=model_settings.get('temperature', NOT_GIVEN),
232
248
  top_p=model_settings.get('top_p', NOT_GIVEN),
@@ -246,6 +262,14 @@ class AnthropicModel(Model):
246
262
  for item in response.content:
247
263
  if isinstance(item, BetaTextBlock):
248
264
  items.append(TextPart(content=item.text))
265
+ elif isinstance(item, BetaRedactedThinkingBlock): # pragma: no cover
266
+ warnings.warn(
267
+ 'PydanticAI currently does not handle redacted thinking blocks. '
268
+ 'If you have a suggestion on how we should handle them, please open an issue.',
269
+ UserWarning,
270
+ )
271
+ elif isinstance(item, BetaThinkingBlock):
272
+ items.append(ThinkingPart(content=item.thinking, signature=item.signature))
249
273
  else:
250
274
  assert isinstance(item, BetaToolUseBlock), f'unexpected item type {type(item)}'
251
275
  items.append(
@@ -312,11 +336,21 @@ class AnthropicModel(Model):
312
336
  if len(user_content_params) > 0:
313
337
  anthropic_messages.append(BetaMessageParam(role='user', content=user_content_params))
314
338
  elif isinstance(m, ModelResponse):
315
- assistant_content_params: list[BetaTextBlockParam | BetaToolUseBlockParam] = []
339
+ assistant_content_params: list[BetaTextBlockParam | BetaToolUseBlockParam | BetaThinkingBlockParam] = []
316
340
  for response_part in m.parts:
317
341
  if isinstance(response_part, TextPart):
318
342
  if response_part.content: # Only add non-empty text
319
343
  assistant_content_params.append(BetaTextBlockParam(text=response_part.content, type='text'))
344
+ elif isinstance(response_part, ThinkingPart):
345
+ # NOTE: We don't send ThinkingPart to the providers yet. If you are unsatisfied with this,
346
+ # please open an issue. The below code is the code to send thinking to the provider.
347
+ # assert response_part.signature is not None, 'Thinking part must have a signature'
348
+ # assistant_content_params.append(
349
+ # BetaThinkingBlockParam(
350
+ # thinking=response_part.content, signature=response_part.signature, type='thinking'
351
+ # )
352
+ # )
353
+ pass
320
354
  else:
321
355
  tool_use_block_param = BetaToolUseBlockParam(
322
356
  id=_guard_tool_call_id(t=response_part),
@@ -445,10 +479,14 @@ class AnthropicStreamedResponse(StreamedResponse):
445
479
  if isinstance(event, BetaRawContentBlockStartEvent):
446
480
  current_block = event.content_block
447
481
  if isinstance(current_block, BetaTextBlock) and current_block.text:
448
- yield self._parts_manager.handle_text_delta( # pragma: lax no cover
449
- vendor_part_id='content', content=current_block.text
482
+ yield self._parts_manager.handle_text_delta(vendor_part_id='content', content=current_block.text)
483
+ elif isinstance(current_block, BetaThinkingBlock):
484
+ yield self._parts_manager.handle_thinking_delta(
485
+ vendor_part_id='thinking',
486
+ content=current_block.thinking,
487
+ signature=current_block.signature,
450
488
  )
451
- elif isinstance(current_block, BetaToolUseBlock): # pragma: no branch
489
+ elif isinstance(current_block, BetaToolUseBlock):
452
490
  maybe_event = self._parts_manager.handle_tool_call_delta(
453
491
  vendor_part_id=current_block.id,
454
492
  tool_name=current_block.name,
@@ -460,14 +498,20 @@ class AnthropicStreamedResponse(StreamedResponse):
460
498
 
461
499
  elif isinstance(event, BetaRawContentBlockDeltaEvent):
462
500
  if isinstance(event.delta, BetaTextDelta):
463
- yield self._parts_manager.handle_text_delta( # pragma: no cover
464
- vendor_part_id='content', content=event.delta.text
501
+ yield self._parts_manager.handle_text_delta(vendor_part_id='content', content=event.delta.text)
502
+ elif isinstance(event.delta, BetaThinkingDelta):
503
+ yield self._parts_manager.handle_thinking_delta(
504
+ vendor_part_id='thinking', content=event.delta.thinking
505
+ )
506
+ elif isinstance(event.delta, BetaSignatureDelta):
507
+ yield self._parts_manager.handle_thinking_delta(
508
+ vendor_part_id='thinking', signature=event.delta.signature
465
509
  )
466
- elif ( # pragma: no branch
510
+ elif (
467
511
  current_block
468
512
  and event.delta.type == 'input_json_delta'
469
513
  and isinstance(current_block, BetaToolUseBlock)
470
- ):
514
+ ): # pragma: no branch
471
515
  maybe_event = self._parts_manager.handle_tool_call_delta(
472
516
  vendor_part_id=current_block.id,
473
517
  tool_name='',
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import functools
4
4
  import typing
5
+ import warnings
5
6
  from collections.abc import AsyncIterator, Iterable, Iterator, Mapping
6
7
  from contextlib import asynccontextmanager
7
8
  from dataclasses import dataclass, field
@@ -27,6 +28,7 @@ from pydantic_ai.messages import (
27
28
  RetryPromptPart,
28
29
  SystemPromptPart,
29
30
  TextPart,
31
+ ThinkingPart,
30
32
  ToolCallPart,
31
33
  ToolReturnPart,
32
34
  UserPromptPart,
@@ -265,11 +267,16 @@ class BedrockConverseModel(Model):
265
267
  items: list[ModelResponsePart] = []
266
268
  if message := response['output'].get('message'): # pragma: no branch
267
269
  for item in message['content']:
270
+ if reasoning_content := item.get('reasoningContent'):
271
+ reasoning_text = reasoning_content.get('reasoningText')
272
+ if reasoning_text: # pragma: no branch
273
+ thinking_part = ThinkingPart(content=reasoning_text['text'])
274
+ if reasoning_signature := reasoning_text.get('signature'):
275
+ thinking_part.signature = reasoning_signature
276
+ items.append(thinking_part)
268
277
  if text := item.get('text'):
269
278
  items.append(TextPart(content=text))
270
- else:
271
- tool_use = item.get('toolUse')
272
- assert tool_use is not None, f'Found a content that is not a text or tool use: {item}'
279
+ elif tool_use := item.get('toolUse'):
273
280
  items.append(
274
281
  ToolCallPart(
275
282
  tool_name=tool_use['name'],
@@ -385,7 +392,7 @@ class BedrockConverseModel(Model):
385
392
 
386
393
  return tool_config
387
394
 
388
- async def _map_messages(
395
+ async def _map_messages( # noqa: C901
389
396
  self, messages: list[ModelMessage]
390
397
  ) -> tuple[list[SystemContentBlockTypeDef], list[MessageUnionTypeDef]]:
391
398
  """Maps a `pydantic_ai.Message` to the Bedrock `MessageUnionTypeDef`.
@@ -448,6 +455,9 @@ class BedrockConverseModel(Model):
448
455
  for item in message.parts:
449
456
  if isinstance(item, TextPart):
450
457
  content.append({'text': item.content})
458
+ elif isinstance(item, ThinkingPart):
459
+ # NOTE: We don't pass the thinking part to Bedrock since it raises an error.
460
+ pass
451
461
  else:
452
462
  assert isinstance(item, ToolCallPart)
453
463
  content.append(self._map_tool_call(item))
@@ -592,6 +602,15 @@ class BedrockStreamedResponse(StreamedResponse):
592
602
  if 'contentBlockDelta' in chunk:
593
603
  index = chunk['contentBlockDelta']['contentBlockIndex']
594
604
  delta = chunk['contentBlockDelta']['delta']
605
+ if 'reasoningContent' in delta:
606
+ if text := delta['reasoningContent'].get('text'):
607
+ yield self._parts_manager.handle_thinking_delta(vendor_part_id=index, content=text)
608
+ else: # pragma: no cover
609
+ warnings.warn(
610
+ f'Only text reasoning content is supported yet, but you got {delta["reasoningContent"]}. '
611
+ 'Please report this to the maintainers.',
612
+ UserWarning,
613
+ )
595
614
  if 'text' in delta:
596
615
  yield self._parts_manager.handle_text_delta(vendor_part_id=index, content=delta['text'])
597
616
  if 'toolUse' in delta:
@@ -6,6 +6,8 @@ from typing import Literal, Union, cast
6
6
 
7
7
  from typing_extensions import assert_never
8
8
 
9
+ from pydantic_ai._thinking_part import split_content_into_text_and_thinking
10
+
9
11
  from .. import ModelHTTPError, usage
10
12
  from .._utils import generate_tool_call_id as _generate_tool_call_id, guard_tool_call_id as _guard_tool_call_id
11
13
  from ..messages import (
@@ -16,6 +18,7 @@ from ..messages import (
16
18
  RetryPromptPart,
17
19
  SystemPromptPart,
18
20
  TextPart,
21
+ ThinkingPart,
19
22
  ToolCallPart,
20
23
  ToolReturnPart,
21
24
  UserPromptPart,
@@ -187,7 +190,7 @@ class CohereModel(Model):
187
190
  # While Cohere's API returns a list, it only does that for future proofing
188
191
  # and currently only one item is being returned.
189
192
  choice = response.message.content[0]
190
- parts.append(TextPart(choice.text))
193
+ parts.extend(split_content_into_text_and_thinking(choice.text))
191
194
  for c in response.message.tool_calls or []:
192
195
  if c.function and c.function.name and c.function.arguments: # pragma: no branch
193
196
  parts.append(
@@ -211,6 +214,11 @@ class CohereModel(Model):
211
214
  for item in message.parts:
212
215
  if isinstance(item, TextPart):
213
216
  texts.append(item.content)
217
+ elif isinstance(item, ThinkingPart):
218
+ # NOTE: We don't send ThinkingPart to the providers yet. If you are unsatisfied with this,
219
+ # please open an issue. The below code is the code to send thinking to the provider.
220
+ # texts.append(f'<think>\n{item.content}\n</think>')
221
+ pass
214
222
  elif isinstance(item, ToolCallPart):
215
223
  tool_calls.append(self._map_tool_call(item))
216
224
  else:
@@ -24,6 +24,7 @@ from ..messages import (
24
24
  RetryPromptPart,
25
25
  SystemPromptPart,
26
26
  TextPart,
27
+ ThinkingPart,
27
28
  ToolCallPart,
28
29
  ToolReturnPart,
29
30
  UserContent,
@@ -268,6 +269,10 @@ def _estimate_usage(messages: Iterable[ModelMessage]) -> usage.Usage:
268
269
  for part in message.parts:
269
270
  if isinstance(part, TextPart):
270
271
  response_tokens += _estimate_string_tokens(part.content)
272
+ elif isinstance(part, ThinkingPart):
273
+ # NOTE: We don't send ThinkingPart to the providers yet.
274
+ # If you are unsatisfied with this, please open an issue.
275
+ pass
271
276
  elif isinstance(part, ToolCallPart):
272
277
  call = part
273
278
  response_tokens += 1 + _estimate_string_tokens(call.args_as_json_str())
@@ -27,6 +27,7 @@ from ..messages import (
27
27
  RetryPromptPart,
28
28
  SystemPromptPart,
29
29
  TextPart,
30
+ ThinkingPart,
30
31
  ToolCallPart,
31
32
  ToolReturnPart,
32
33
  UserPromptPart,
@@ -94,6 +95,15 @@ class GeminiModelSettings(ModelSettings, total=False):
94
95
  See the [Gemini API docs](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/add-labels-to-api-calls) for use cases and limitations.
95
96
  """
96
97
 
98
+ gemini_thinking_config: ThinkingConfig
99
+ """Thinking is on by default in both the API and AI Studio.
100
+
101
+ Being on by default doesn't mean the model will send back thoughts. For that, you need to set `include_thoughts`
102
+ to `True`. If you want to turn it off, set `thinking_budget` to `0`.
103
+
104
+ See more about it on <https://ai.google.dev/gemini-api/docs/thinking>.
105
+ """
106
+
97
107
 
98
108
  @dataclass(init=False)
99
109
  class GeminiModel(Model):
@@ -379,7 +389,7 @@ def _settings_to_generation_config(model_settings: GeminiModelSettings) -> _Gemi
379
389
  if (frequency_penalty := model_settings.get('frequency_penalty')) is not None:
380
390
  config['frequency_penalty'] = frequency_penalty
381
391
  if (thinkingConfig := model_settings.get('gemini_thinking_config')) is not None:
382
- config['thinking_config'] = thinkingConfig # pragma: no cover
392
+ config['thinking_config'] = thinkingConfig # pragma: lax no cover
383
393
  return config
384
394
 
385
395
 
@@ -576,6 +586,11 @@ def _content_model_response(m: ModelResponse) -> _GeminiContent:
576
586
  for item in m.parts:
577
587
  if isinstance(item, ToolCallPart):
578
588
  parts.append(_function_call_part_from_call(item))
589
+ elif isinstance(item, ThinkingPart):
590
+ # NOTE: We don't send ThinkingPart to the providers yet. If you are unsatisfied with this,
591
+ # please open an issue. The below code is the code to send thinking to the provider.
592
+ # parts.append(_GeminiTextPart(text=item.content, thought=True))
593
+ pass
579
594
  elif isinstance(item, TextPart):
580
595
  if item.content:
581
596
  parts.append(_GeminiTextPart(text=item.content))
@@ -584,29 +599,34 @@ def _content_model_response(m: ModelResponse) -> _GeminiContent:
584
599
  return _GeminiContent(role='model', parts=parts)
585
600
 
586
601
 
587
- class _GeminiTextPart(TypedDict):
602
+ class _BasePart(TypedDict):
603
+ thought: NotRequired[bool]
604
+ """Indicates if the part is thought from the model."""
605
+
606
+
607
+ class _GeminiTextPart(_BasePart):
588
608
  text: str
589
609
 
590
610
 
591
- class _GeminiInlineData(TypedDict):
611
+ class _GeminiInlineData(_BasePart):
592
612
  data: str
593
613
  mime_type: Annotated[str, pydantic.Field(alias='mimeType')]
594
614
 
595
615
 
596
- class _GeminiInlineDataPart(TypedDict):
616
+ class _GeminiInlineDataPart(_BasePart):
597
617
  """See <https://ai.google.dev/api/caching#Blob>."""
598
618
 
599
619
  inline_data: Annotated[_GeminiInlineData, pydantic.Field(alias='inlineData')]
600
620
 
601
621
 
602
- class _GeminiFileData(TypedDict):
622
+ class _GeminiFileData(_BasePart):
603
623
  """See <https://ai.google.dev/api/caching#FileData>."""
604
624
 
605
625
  file_uri: Annotated[str, pydantic.Field(alias='fileUri')]
606
626
  mime_type: Annotated[str, pydantic.Field(alias='mimeType')]
607
627
 
608
628
 
609
- class _GeminiFileDataPart(TypedDict):
629
+ class _GeminiFileDataPart(_BasePart):
610
630
  file_data: Annotated[_GeminiFileData, pydantic.Field(alias='fileData')]
611
631
 
612
632
 
@@ -615,7 +635,7 @@ class _GeminiThoughtPart(TypedDict):
615
635
  thought_signature: Annotated[str, pydantic.Field(alias='thoughtSignature')]
616
636
 
617
637
 
618
- class _GeminiFunctionCallPart(TypedDict):
638
+ class _GeminiFunctionCallPart(_BasePart):
619
639
  function_call: Annotated[_GeminiFunctionCall, pydantic.Field(alias='functionCall')]
620
640
 
621
641
 
@@ -633,7 +653,12 @@ def _process_response_from_parts(
633
653
  items: list[ModelResponsePart] = []
634
654
  for part in parts:
635
655
  if 'text' in part:
636
- items.append(TextPart(content=part['text']))
656
+ # NOTE: Google doesn't include the `thought` field anymore. We handle this here in case they decide to
657
+ # change their mind and start including it again.
658
+ if part.get('thought'): # pragma: no cover
659
+ items.append(ThinkingPart(content=part['text']))
660
+ else:
661
+ items.append(TextPart(content=part['text']))
637
662
  elif 'function_call' in part:
638
663
  items.append(ToolCallPart(tool_name=part['function_call']['name'], args=part['function_call']['args']))
639
664
  elif 'function_response' in part: # pragma: no cover
@@ -10,9 +10,8 @@ from uuid import uuid4
10
10
 
11
11
  from typing_extensions import assert_never
12
12
 
13
- from pydantic_ai.providers import Provider
14
-
15
13
  from .. import UnexpectedModelBehavior, _utils, usage
14
+ from ..exceptions import UserError
16
15
  from ..messages import (
17
16
  BinaryContent,
18
17
  FileUrl,
@@ -24,12 +23,14 @@ from ..messages import (
24
23
  RetryPromptPart,
25
24
  SystemPromptPart,
26
25
  TextPart,
26
+ ThinkingPart,
27
27
  ToolCallPart,
28
28
  ToolReturnPart,
29
29
  UserPromptPart,
30
30
  VideoUrl,
31
31
  )
32
32
  from ..profiles import ModelProfileSpec
33
+ from ..providers import Provider
33
34
  from ..settings import ModelSettings
34
35
  from ..tools import ToolDefinition
35
36
  from . import (
@@ -52,6 +53,7 @@ try:
52
53
  FunctionDeclarationDict,
53
54
  GenerateContentConfigDict,
54
55
  GenerateContentResponse,
56
+ HttpOptionsDict,
55
57
  Part,
56
58
  PartDict,
57
59
  SafetySettingDict,
@@ -252,8 +254,17 @@ class GoogleModel(Model):
252
254
  tool_config = self._get_tool_config(model_request_parameters, tools)
253
255
  system_instruction, contents = await self._map_messages(messages)
254
256
 
257
+ http_options: HttpOptionsDict = {
258
+ 'headers': {'Content-Type': 'application/json', 'User-Agent': get_user_agent()}
259
+ }
260
+ if timeout := model_settings.get('timeout'):
261
+ if isinstance(timeout, (int, float)):
262
+ http_options['timeout'] = int(1000 * timeout)
263
+ else:
264
+ raise UserError('Google does not support setting ModelSettings.timeout to a httpx.Timeout')
265
+
255
266
  config = GenerateContentConfigDict(
256
- http_options={'headers': {'Content-Type': 'application/json', 'User-Agent': get_user_agent()}},
267
+ http_options=http_options,
257
268
  system_instruction=system_instruction,
258
269
  temperature=model_settings.get('temperature'),
259
270
  top_p=model_settings.get('top_p'),
@@ -403,7 +414,10 @@ class GeminiStreamedResponse(StreamedResponse):
403
414
  assert candidate.content.parts is not None
404
415
  for part in candidate.content.parts:
405
416
  if part.text is not None:
406
- yield self._parts_manager.handle_text_delta(vendor_part_id='content', content=part.text)
417
+ if part.thought:
418
+ yield self._parts_manager.handle_thinking_delta(vendor_part_id='thinking', content=part.text)
419
+ else:
420
+ yield self._parts_manager.handle_text_delta(vendor_part_id='content', content=part.text)
407
421
  elif part.function_call:
408
422
  maybe_event = self._parts_manager.handle_tool_call_delta(
409
423
  vendor_part_id=uuid4(),
@@ -436,6 +450,11 @@ def _content_model_response(m: ModelResponse) -> ContentDict:
436
450
  elif isinstance(item, TextPart):
437
451
  if item.content: # pragma: no branch
438
452
  parts.append({'text': item.content})
453
+ elif isinstance(item, ThinkingPart): # pragma: no cover
454
+ # NOTE: We don't send ThinkingPart to the providers yet. If you are unsatisfied with this,
455
+ # please open an issue. The below code is the code to send thinking to the provider.
456
+ # parts.append({'text': item.content, 'thought': True})
457
+ pass
439
458
  else:
440
459
  assert_never(item)
441
460
  return ContentDict(role='model', parts=parts)
@@ -451,7 +470,10 @@ def _process_response_from_parts(
451
470
  items: list[ModelResponsePart] = []
452
471
  for part in parts:
453
472
  if part.text is not None:
454
- items.append(TextPart(content=part.text))
473
+ if part.thought:
474
+ items.append(ThinkingPart(content=part.text))
475
+ else:
476
+ items.append(TextPart(content=part.text))
455
477
  elif part.function_call:
456
478
  assert part.function_call.name is not None
457
479
  tool_call_part = ToolCallPart(tool_name=part.function_call.name, args=part.function_call.args)
@@ -9,6 +9,8 @@ from typing import Literal, Union, cast, overload
9
9
 
10
10
  from typing_extensions import assert_never
11
11
 
12
+ from pydantic_ai._thinking_part import split_content_into_text_and_thinking
13
+
12
14
  from .. import ModelHTTPError, UnexpectedModelBehavior, _utils, usage
13
15
  from .._utils import guard_tool_call_id as _guard_tool_call_id, number_to_datetime
14
16
  from ..messages import (
@@ -23,6 +25,7 @@ from ..messages import (
23
25
  RetryPromptPart,
24
26
  SystemPromptPart,
25
27
  TextPart,
28
+ ThinkingPart,
26
29
  ToolCallPart,
27
30
  ToolReturnPart,
28
31
  UserPromptPart,
@@ -95,7 +98,7 @@ class GroqModelSettings(ModelSettings, total=False):
95
98
  ALL FIELDS MUST BE `groq_` PREFIXED SO YOU CAN MERGE THEM WITH OTHER MODELS.
96
99
  """
97
100
 
98
- # This class is a placeholder for any future groq-specific settings
101
+ groq_reasoning_format: Literal['hidden', 'raw', 'parsed']
99
102
 
100
103
 
101
104
  @dataclass(init=False)
@@ -234,6 +237,7 @@ class GroqModel(Model):
234
237
  timeout=model_settings.get('timeout', NOT_GIVEN),
235
238
  seed=model_settings.get('seed', NOT_GIVEN),
236
239
  presence_penalty=model_settings.get('presence_penalty', NOT_GIVEN),
240
+ reasoning_format=model_settings.get('groq_reasoning_format', NOT_GIVEN),
237
241
  frequency_penalty=model_settings.get('frequency_penalty', NOT_GIVEN),
238
242
  logit_bias=model_settings.get('logit_bias', NOT_GIVEN),
239
243
  extra_headers=extra_headers,
@@ -249,8 +253,12 @@ class GroqModel(Model):
249
253
  timestamp = number_to_datetime(response.created)
250
254
  choice = response.choices[0]
251
255
  items: list[ModelResponsePart] = []
256
+ # NOTE: The `reasoning` field is only present if `groq_reasoning_format` is set to `parsed`.
257
+ if choice.message.reasoning is not None:
258
+ items.append(ThinkingPart(content=choice.message.reasoning))
252
259
  if choice.message.content is not None:
253
- items.append(TextPart(content=choice.message.content))
260
+ # NOTE: The `<think>` tag is only present if `groq_reasoning_format` is set to `raw`.
261
+ items.extend(split_content_into_text_and_thinking(choice.message.content))
254
262
  if choice.message.tool_calls is not None:
255
263
  for c in choice.message.tool_calls:
256
264
  items.append(ToolCallPart(tool_name=c.function.name, args=c.function.arguments, tool_call_id=c.id))
@@ -293,6 +301,9 @@ class GroqModel(Model):
293
301
  texts.append(item.content)
294
302
  elif isinstance(item, ToolCallPart):
295
303
  tool_calls.append(self._map_tool_call(item))
304
+ elif isinstance(item, ThinkingPart):
305
+ # Skip thinking parts when mapping to Groq messages
306
+ continue
296
307
  else:
297
308
  assert_never(item)
298
309
  message_param = chat.ChatCompletionAssistantMessageParam(role='assistant')
@@ -134,7 +134,7 @@ class InstrumentationSettings:
134
134
  **tokens_histogram_kwargs,
135
135
  explicit_bucket_boundaries_advisory=TOKEN_HISTOGRAM_BOUNDARIES,
136
136
  )
137
- except TypeError:
137
+ except TypeError: # pragma: lax no cover
138
138
  # Older OTel/logfire versions don't support explicit_bucket_boundaries_advisory
139
139
  self.tokens_histogram = self.meter.create_histogram(
140
140
  **tokens_histogram_kwargs, # pyright: ignore
@@ -11,6 +11,8 @@ import pydantic_core
11
11
  from httpx import Timeout
12
12
  from typing_extensions import assert_never
13
13
 
14
+ from pydantic_ai._thinking_part import split_content_into_text_and_thinking
15
+
14
16
  from .. import ModelHTTPError, UnexpectedModelBehavior, _utils
15
17
  from .._utils import generate_tool_call_id as _generate_tool_call_id, now_utc as _now_utc, number_to_datetime
16
18
  from ..messages import (
@@ -25,6 +27,7 @@ from ..messages import (
25
27
  RetryPromptPart,
26
28
  SystemPromptPart,
27
29
  TextPart,
30
+ ThinkingPart,
28
31
  ToolCallPart,
29
32
  ToolReturnPart,
30
33
  UserPromptPart,
@@ -322,7 +325,7 @@ class MistralModel(Model):
322
325
 
323
326
  parts: list[ModelResponsePart] = []
324
327
  if text := _map_content(content):
325
- parts.append(TextPart(content=text))
328
+ parts.extend(split_content_into_text_and_thinking(text))
326
329
 
327
330
  if isinstance(tool_calls, list):
328
331
  for tool_call in tool_calls:
@@ -484,6 +487,11 @@ class MistralModel(Model):
484
487
  for part in message.parts:
485
488
  if isinstance(part, TextPart):
486
489
  content_chunks.append(MistralTextChunk(text=part.content))
490
+ elif isinstance(part, ThinkingPart):
491
+ # NOTE: We don't send ThinkingPart to the providers yet. If you are unsatisfied with this,
492
+ # please open an issue. The below code is the code to send thinking to the provider.
493
+ # content_chunks.append(MistralTextChunk(text=f'<think>{part.content}</think>'))
494
+ pass
487
495
  elif isinstance(part, ToolCallPart):
488
496
  tool_calls.append(self._map_tool_call(part))
489
497
  else: