PyPI - pydantic-ai - Versions diffs - 0.2.11__tar.gz → 0.2.13__tar.gz - Mend

pydantic-ai 0.2.11tar.gz → 0.2.13tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-ai might be problematic. Click here for more details.

Files changed (230) hide show

{pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydantic-ai
-Version: 0.2.11
+Version: 0.2.13
 Summary: Agent Framework / shim to use Pydantic with LLMs
 Project-URL: Homepage, https://ai.pydantic.dev
 Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -28,9 +28,9 @@ Classifier: Topic :: Internet
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.9
-Requires-Dist: pydantic-ai-slim[a2a,anthropic,bedrock,cli,cohere,evals,google,groq,mcp,mistral,openai,vertexai]==0.2.11
+Requires-Dist: pydantic-ai-slim[a2a,anthropic,bedrock,cli,cohere,evals,google,groq,mcp,mistral,openai,vertexai]==0.2.13
 Provides-Extra: examples
-Requires-Dist: pydantic-ai-examples==0.2.11; extra == 'examples'
+Requires-Dist: pydantic-ai-examples==0.2.13; extra == 'examples'
 Provides-Extra: logfire
 Requires-Dist: logfire>=3.11.0; extra == 'logfire'
 Description-Content-Type: text/markdown

{pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_evaluator_common.py RENAMED Viewed

@@ -208,11 +208,21 @@ async def test_llm_judge_evaluator(mocker: MockerFixture):
     mock_judge_input_output = mocker.patch('pydantic_evals.evaluators.llm_as_a_judge.judge_input_output')
     mock_judge_input_output.return_value = mock_grading_output
+    # Mock the judge_input_output_expected function
+    mock_judge_input_output_expected = mocker.patch(
+        'pydantic_evals.evaluators.llm_as_a_judge.judge_input_output_expected'
+    )
+    mock_judge_input_output_expected.return_value = mock_grading_output
+    # Mock the judge_output_expected function
+    mock_judge_output_expected = mocker.patch('pydantic_evals.evaluators.llm_as_a_judge.judge_output_expected')
+    mock_judge_output_expected.return_value = mock_grading_output
     ctx = EvaluatorContext(
         name='test',
         inputs={'prompt': 'Hello'},
         metadata=None,
-        expected_output=None,
+        expected_output='Hello',
         output='Hello world',
         duration=0.0,
         _span_tree=SpanTreeRecordingError('spans were not recorded'),
@@ -238,6 +248,29 @@ async def test_llm_judge_evaluator(mocker: MockerFixture):
         {'prompt': 'Hello'}, 'Hello world', 'Output contains input', 'openai:gpt-4o', None
     )
+    # Test with input and expected output
+    evaluator = LLMJudge(
+        rubric='Output contains input', include_input=True, include_expected_output=True, model='openai:gpt-4o'
+    )
+    assert to_jsonable_python(await evaluator.evaluate(ctx)) == snapshot(
+        {'LLMJudge': {'value': True, 'reason': 'Test passed'}}
+    )
+    mock_judge_input_output_expected.assert_called_once_with(
+        {'prompt': 'Hello'}, 'Hello world', 'Hello', 'Output contains input', 'openai:gpt-4o', None
+    )
+    # Test with output and expected output
+    evaluator = LLMJudge(
+        rubric='Output contains input', include_input=False, include_expected_output=True, model='openai:gpt-4o'
+    )
+    assert to_jsonable_python(await evaluator.evaluate(ctx)) == snapshot(
+        {'LLMJudge': {'value': True, 'reason': 'Test passed'}}
+    )
+    mock_judge_output_expected.assert_called_once_with(
+        'Hello world', 'Hello', 'Output contains input', 'openai:gpt-4o', None
+    )
     # Test with failing result
     mock_grading_output.score = 0.0
     mock_grading_output.pass_ = False
@@ -273,13 +306,21 @@ async def test_llm_judge_evaluator_with_model_settings(mocker: MockerFixture):
     mock_judge_input_output = mocker.patch('pydantic_evals.evaluators.llm_as_a_judge.judge_input_output')
     mock_judge_input_output.return_value = mock_grading_output
+    mock_judge_input_output_expected = mocker.patch(
+        'pydantic_evals.evaluators.llm_as_a_judge.judge_input_output_expected'
+    )
+    mock_judge_input_output_expected.return_value = mock_grading_output
+    mock_judge_output_expected = mocker.patch('pydantic_evals.evaluators.llm_as_a_judge.judge_output_expected')
+    mock_judge_output_expected.return_value = mock_grading_output
     custom_model_settings = ModelSettings(temperature=0.77)
     ctx = EvaluatorContext(
         name='test_custom_settings',
         inputs={'prompt': 'Hello Custom'},
         metadata=None,
-        expected_output=None,
+        expected_output='Hello',
         output='Hello world custom settings',
         duration=0.0,
         _span_tree=SpanTreeRecordingError('spans were not recorded'),
@@ -314,6 +355,45 @@ async def test_llm_judge_evaluator_with_model_settings(mocker: MockerFixture):
         custom_model_settings,
     )
+    # Test with input and expected output, with custom model_settings
+    evaluator_with_input_expected = LLMJudge(
+        rubric='Output contains input with custom settings',
+        include_input=True,
+        include_expected_output=True,
+        model='openai:gpt-3.5-turbo',
+        model_settings=custom_model_settings,
+    )
+    assert to_jsonable_python(await evaluator_with_input_expected.evaluate(ctx)) == snapshot(
+        {'LLMJudge': {'value': True, 'reason': 'Test passed with settings'}}
+    )
+    mock_judge_input_output_expected.assert_called_once_with(
+        {'prompt': 'Hello Custom'},
+        'Hello world custom settings',
+        'Hello',
+        'Output contains input with custom settings',
+        'openai:gpt-3.5-turbo',
+        custom_model_settings,
+    )
+    # Test with output and expected output
+    evaluator_with_output_expected = LLMJudge(
+        rubric='Output contains input with custom settings',
+        include_input=False,
+        include_expected_output=True,
+        model='openai:gpt-3.5-turbo',
+        model_settings=custom_model_settings,
+    )
+    assert to_jsonable_python(await evaluator_with_output_expected.evaluate(ctx)) == snapshot(
+        {'LLMJudge': {'value': True, 'reason': 'Test passed with settings'}}
+    )
+    mock_judge_output_expected.assert_called_once_with(
+        'Hello world custom settings',
+        'Hello',
+        'Output contains input with custom settings',
+        'openai:gpt-3.5-turbo',
+        custom_model_settings,
+    )
 async def test_python():
     """Test Python evaluator."""

{pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_llm_as_a_judge.py RENAMED Viewed

@@ -11,7 +11,9 @@ with try_import() as imports_successful:
         GradingOutput,
         _stringify,  # pyright: ignore[reportPrivateUsage]
         judge_input_output,
+        judge_input_output_expected,
         judge_output,
+        judge_output_expected,
     )
 pytestmark = [pytest.mark.skipif(not imports_successful(), reason='pydantic-evals not installed'), pytest.mark.anyio]
@@ -167,3 +169,114 @@ async def test_judge_input_output_with_model_settings_mock(mocker: MockerFixture
     assert call_kwargs['model_settings'] == test_model_settings
     # Check if 'model' kwarg is passed, its value will be the default model or None
     assert 'model' in call_kwargs
+@pytest.mark.anyio
+async def test_judge_input_output_expected_mock(mocker: MockerFixture):
+    """Test judge_input_output_expected function with mocked agent."""
+    # Mock the agent run method
+    mock_result = mocker.MagicMock()
+    mock_result.output = GradingOutput(reason='Test passed', pass_=True, score=1.0)
+    mock_run = mocker.patch('pydantic_ai.Agent.run', return_value=mock_result)
+    # Test with string input and output
+    result = await judge_input_output_expected('Hello', 'Hello world', 'Hello', 'Output contains input')
+    assert isinstance(result, GradingOutput)
+    assert result.reason == 'Test passed'
+    assert result.pass_ is True
+    assert result.score == 1.0
+    # Verify the agent was called with correct prompt
+    mock_run.assert_called_once()
+    call_args = mock_run.call_args[0]
+    assert '<Input>\nHello\n</Input>' in call_args[0]
+    assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
+    assert '<Output>\nHello world\n</Output>' in call_args[0]
+    assert '<Rubric>\nOutput contains input\n</Rubric>' in call_args[0]
+@pytest.mark.anyio
+async def test_judge_input_output_expected_with_model_settings_mock(mocker: MockerFixture):
+    """Test judge_input_output_expected function with model_settings and mocked agent."""
+    mock_result = mocker.MagicMock()
+    mock_result.output = GradingOutput(reason='Test passed with settings', pass_=True, score=1.0)
+    mock_run = mocker.patch('pydantic_ai.Agent.run', return_value=mock_result)
+    test_model_settings = ModelSettings(temperature=1)
+    result = await judge_input_output_expected(
+        'Hello settings',
+        'Hello world with settings',
+        'Hello',
+        'Output contains input with settings',
+        model_settings=test_model_settings,
+    )
+    assert isinstance(result, GradingOutput)
+    assert result.reason == 'Test passed with settings'
+    assert result.pass_ is True
+    assert result.score == 1.0
+    mock_run.assert_called_once()
+    call_args, call_kwargs = mock_run.call_args
+    assert '<Input>\nHello settings\n</Input>' in call_args[0]
+    assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
+    assert '<Output>\nHello world with settings\n</Output>' in call_args[0]
+    assert '<Rubric>\nOutput contains input with settings\n</Rubric>' in call_args[0]
+    assert call_kwargs['model_settings'] == test_model_settings
+    # Check if 'model' kwarg is passed, its value will be the default model or None
+    assert 'model' in call_kwargs
+@pytest.mark.anyio
+async def test_judge_output_expected_mock(mocker: MockerFixture):
+    """Test judge_output_expected function with mocked agent."""
+    # Mock the agent run method
+    mock_result = mocker.MagicMock()
+    mock_result.output = GradingOutput(reason='Test passed', pass_=True, score=1.0)
+    mock_run = mocker.patch('pydantic_ai.Agent.run', return_value=mock_result)
+    # Test with string output and expected output
+    result = await judge_output_expected('Hello world', 'Hello', 'Output contains input')
+    assert isinstance(result, GradingOutput)
+    assert result.reason == 'Test passed'
+    assert result.pass_ is True
+    assert result.score == 1.0
+    # Verify the agent was called with correct prompt
+    mock_run.assert_called_once()
+    call_args = mock_run.call_args[0]
+    assert '<Input>' not in call_args[0]
+    assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
+    assert '<Output>\nHello world\n</Output>' in call_args[0]
+    assert '<Rubric>\nOutput contains input\n</Rubric>' in call_args[0]
+@pytest.mark.anyio
+async def test_judge_output_expected_with_model_settings_mock(mocker: MockerFixture):
+    """Test judge_output_expected function with model_settings and mocked agent."""
+    mock_result = mocker.MagicMock()
+    mock_result.output = GradingOutput(reason='Test passed with settings', pass_=True, score=1.0)
+    mock_run = mocker.patch('pydantic_ai.Agent.run', return_value=mock_result)
+    test_model_settings = ModelSettings(temperature=1)
+    result = await judge_output_expected(
+        'Hello world with settings',
+        'Hello',
+        'Output contains input with settings',
+        model_settings=test_model_settings,
+    )
+    assert isinstance(result, GradingOutput)
+    assert result.reason == 'Test passed with settings'
+    assert result.pass_ is True
+    assert result.score == 1.0
+    mock_run.assert_called_once()
+    call_args, call_kwargs = mock_run.call_args
+    assert '<Input>' not in call_args[0]
+    assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
+    assert '<Output>\nHello world with settings\n</Output>' in call_args[0]
+    assert '<Rubric>\nOutput contains input with settings\n</Rubric>' in call_args[0]
+    assert call_kwargs['model_settings'] == test_model_settings
+    # Check if 'model' kwarg is passed, its value will be the default model or None
+    assert 'model' in call_kwargs

pydantic_ai-0.2.13/tests/models/cassettes/test_openai/test_compatible_api_with_tool_calls_without_id.yaml ADDED Viewed

@@ -0,0 +1,159 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '326'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+    method: POST
+    parsed_body:
+      messages:
+      - content: What is the current time?
+        role: user
+      model: gemini-2.5-pro-preview-05-06
+      stream: false
+      tool_choice: auto
+      tools:
+      - function:
+          description: Get the current time.
+          name: get_current_time
+          parameters:
+            additionalProperties: false
+            properties: {}
+            type: object
+        type: function
+    uri: https://generativelanguage.googleapis.com/v1beta/openai/chat/completions
+  response:
+    headers:
+      alt-svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      content-length:
+      - '1166'
+      content-type:
+      - application/json
+      server-timing:
+      - gfet4t7; dur=1609
+      transfer-encoding:
+      - chunked
+      vary:
+      - Origin
+      - X-Origin
+      - Referer
+    parsed_body:
+      choices:
+      - finish_reason: tool_calls
+        index: 0
+        message:
+          extra_content:
+            google:
+              thought: true
+              thought_signature: AVSoXO4AzAXs7GGvOY63fp8CwJK3yR8HbUPhxhfN2HaPvJnscmZCkaWvckz5NL3nIMK+si/baQcsM2Q8ME9V1RQrb3w1IKceWfjO3kHPL11odY/p6Us4GkkvJqKU/OgUnbAMbuvNdX1pyXWZUrQ7WyXZ5F4mjbxBSCLiVOTdFlK53zn+ajq5JIuG9AYHgwE/sJxUUpvNd6RcWvZR3fQb8gufjCspiO2ZdInRcdGsz/+XftFHxFbXkdtCRAw74AtjlN5osb+KgDYojdohKIEit9DcTBe7hI7oEHWMfnqYSgGrrad4FJpNB3jXmSFevE2iYYKUBzWvxJNj8fIYrCC0g4rJ1aJvuoU=
+          role: assistant
+          thought_signature: AVSoXO4AzAXs7GGvOY63fp8CwJK3yR8HbUPhxhfN2HaPvJnscmZCkaWvckz5NL3nIMK+si/baQcsM2Q8ME9V1RQrb3w1IKceWfjO3kHPL11odY/p6Us4GkkvJqKU/OgUnbAMbuvNdX1pyXWZUrQ7WyXZ5F4mjbxBSCLiVOTdFlK53zn+ajq5JIuG9AYHgwE/sJxUUpvNd6RcWvZR3fQb8gufjCspiO2ZdInRcdGsz/+XftFHxFbXkdtCRAw74AtjlN5osb+KgDYojdohKIEit9DcTBe7hI7oEHWMfnqYSgGrrad4FJpNB3jXmSFevE2iYYKUBzWvxJNj8fIYrCC0g4rJ1aJvuoU=
+          tool_calls:
+          - function:
+              arguments: '{}'
+              name: get_current_time
+            id: ''
+            type: function
+      created: 1748902365
+      id: 3SE-aKjdCcCEz7IPxpqjCA
+      model: gemini-2.5-pro-preview-05-06
+      object: chat.completion
+      usage:
+        completion_tokens: 12
+        prompt_tokens: 35
+        total_tokens: 109
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '575'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+    method: POST
+    parsed_body:
+      messages:
+      - content: What is the current time?
+        role: user
+      - role: assistant
+        tool_calls:
+        - function:
+            arguments: '{}'
+            name: get_current_time
+          id: pyd_ai_cee885c699414386a7e14b7ec43cadbc
+          type: function
+      - content: Noon
+        role: tool
+        tool_call_id: pyd_ai_cee885c699414386a7e14b7ec43cadbc
+      model: gemini-2.5-pro-preview-05-06
+      stream: false
+      tool_choice: auto
+      tools:
+      - function:
+          description: Get the current time.
+          name: get_current_time
+          parameters:
+            additionalProperties: false
+            properties: {}
+            type: object
+        type: function
+    uri: https://generativelanguage.googleapis.com/v1beta/openai/chat/completions
+  response:
+    headers:
+      alt-svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      content-length:
+      - '755'
+      content-type:
+      - application/json
+      server-timing:
+      - gfet4t7; dur=1097
+      transfer-encoding:
+      - chunked
+      vary:
+      - Origin
+      - X-Origin
+      - Referer
+    parsed_body:
+      choices:
+      - finish_reason: stop
+        index: 0
+        message:
+          content: The current time is Noon.
+          extra_content:
+            google:
+              thought: true
+              thought_signature: AVSoXO4/lu90Bn7IxVcWAjD6KH3ZHMmsCX1tnPJERDI6SZb63hrSEtmJT/v+sn2SzlecMoXBVmtcrd3keFszUgDpLjFm1gB+uMzLS1IqPdEAh+m5S71k1hfStNMFen63UnphYHWt4UrjVHXckysRLVJjCuMmE01hQXcVh9b3YXvfWfZEFA==
+          role: assistant
+          thought_signature: AVSoXO4/lu90Bn7IxVcWAjD6KH3ZHMmsCX1tnPJERDI6SZb63hrSEtmJT/v+sn2SzlecMoXBVmtcrd3keFszUgDpLjFm1gB+uMzLS1IqPdEAh+m5S71k1hfStNMFen63UnphYHWt4UrjVHXckysRLVJjCuMmE01hQXcVh9b3YXvfWfZEFA==
+      created: 1748902366
+      id: 3iE-aNK3EIGJz7IPt_mYoAs
+      model: gemini-2.5-pro-preview-05-06
+      object: chat.completion
+      usage:
+        completion_tokens: 6
+        prompt_tokens: 66
+        total_tokens: 100
+    status:
+      code: 200
+      message: OK
+version: 1

{pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_bedrock.py RENAMED Viewed

@@ -31,6 +31,8 @@ from pydantic_ai.messages import (
     UserPromptPart,
     VideoUrl,
 )
+from pydantic_ai.models import ModelRequestParameters
+from pydantic_ai.tools import ToolDefinition
 from pydantic_ai.usage import Usage
 from ..conftest import IsDatetime, try_import
@@ -631,3 +633,96 @@ async def test_bedrock_group_consecutive_tool_return_parts(bedrock_provider: Bed
             },
         ]
     )
+async def test_bedrock_mistral_tool_result_format(bedrock_provider: BedrockProvider):
+    now = datetime.datetime.now()
+    req = [
+        ModelRequest(
+            parts=[
+                ToolReturnPart(tool_name='tool1', content={'foo': 'bar'}, tool_call_id='id1', timestamp=now),
+            ]
+        ),
+    ]
+    # Models other than Mistral support toolResult.content with text, not json
+    model = BedrockConverseModel('us.amazon.nova-micro-v1:0', provider=bedrock_provider)
+    # Call the mapping function directly
+    _, bedrock_messages = await model._map_messages(req)  # type: ignore[reportPrivateUsage]
+    assert bedrock_messages == snapshot(
+        [
+            {
+                'role': 'user',
+                'content': [
+                    {'toolResult': {'toolUseId': 'id1', 'content': [{'text': '{"foo":"bar"}'}], 'status': 'success'}},
+                ],
+            },
+        ]
+    )
+    # Mistral requires toolResult.content to hold json, not text
+    model = BedrockConverseModel('mistral.mistral-7b-instruct-v0:2', provider=bedrock_provider)
+    # Call the mapping function directly
+    _, bedrock_messages = await model._map_messages(req)  # type: ignore[reportPrivateUsage]
+    assert bedrock_messages == snapshot(
+        [
+            {
+                'role': 'user',
+                'content': [
+                    {'toolResult': {'toolUseId': 'id1', 'content': [{'json': {'foo': 'bar'}}], 'status': 'success'}},
+                ],
+            },
+        ]
+    )
+async def test_bedrock_anthropic_no_tool_choice(bedrock_provider: BedrockProvider):
+    my_tool = ToolDefinition(
+        'my_tool',
+        'This is my tool',
+        {'type': 'object', 'title': 'Result', 'properties': {'spam': {'type': 'number'}}},
+    )
+    mrp = ModelRequestParameters(function_tools=[my_tool], allow_text_output=False, output_tools=[])
+    # Models other than Anthropic support tool_choice
+    model = BedrockConverseModel('us.amazon.nova-micro-v1:0', provider=bedrock_provider)
+    tool_config = model._map_tool_config(mrp)  # type: ignore[reportPrivateUsage]
+    assert tool_config == snapshot(
+        {
+            'tools': [
+                {
+                    'toolSpec': {
+                        'name': 'my_tool',
+                        'description': 'This is my tool',
+                        'inputSchema': {
+                            'json': {'type': 'object', 'title': 'Result', 'properties': {'spam': {'type': 'number'}}}
+                        },
+                    }
+                }
+            ],
+            'toolChoice': {'any': {}},
+        }
+    )
+    # Anthropic models don't support tool_choice
+    model = BedrockConverseModel('us.anthropic.claude-3-7-sonnet-20250219-v1:0', provider=bedrock_provider)
+    tool_config = model._map_tool_config(mrp)  # type: ignore[reportPrivateUsage]
+    assert tool_config == snapshot(
+        {
+            'tools': [
+                {
+                    'toolSpec': {
+                        'name': 'my_tool',
+                        'description': 'This is my tool',
+                        'inputSchema': {
+                            'json': {'type': 'object', 'title': 'Result', 'properties': {'spam': {'type': 'number'}}}
+                        },
+                    }
+                }
+            ]
+        }
+    )

pydantic-ai 0.2.11__tar.gz → 0.2.13__tar.gz

Potentially problematic release.

pydantic-ai 0.2.11tar.gz → 0.2.13tar.gz