PyPI - pydantic-ai - Versions diffs - 0.4.4__tar.gz → 0.4.6__tar.gz - Mend

pydantic-ai 0.4.4tar.gz → 0.4.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (337) hide show

{pydantic_ai-0.4.4 → pydantic_ai-0.4.6}/.gitignore RENAMED Viewed

@@ -15,7 +15,6 @@ examples/pydantic_ai_examples/.chat_app_messages.sqlite
 .vscode/
 /question_graph_history.json
 /docs-site/.wrangler/
-/CLAUDE.md
 node_modules/
 **.idea/
 .coverage*

{pydantic_ai-0.4.4 → pydantic_ai-0.4.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydantic-ai
-Version: 0.4.4
+Version: 0.4.6
 Summary: Agent Framework / shim to use Pydantic with LLMs
 Project-URL: Homepage, https://ai.pydantic.dev
 Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -28,11 +28,11 @@ Classifier: Topic :: Internet
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.9
-Requires-Dist: pydantic-ai-slim[ag-ui,anthropic,bedrock,cli,cohere,evals,google,groq,huggingface,mcp,mistral,openai,vertexai]==0.4.4
+Requires-Dist: pydantic-ai-slim[ag-ui,anthropic,bedrock,cli,cohere,evals,google,groq,huggingface,mcp,mistral,openai,vertexai]==0.4.6
 Provides-Extra: a2a
 Requires-Dist: fasta2a>=0.4.1; extra == 'a2a'
 Provides-Extra: examples
-Requires-Dist: pydantic-ai-examples==0.4.4; extra == 'examples'
+Requires-Dist: pydantic-ai-examples==0.4.6; extra == 'examples'
 Provides-Extra: logfire
 Requires-Dist: logfire>=3.11.0; extra == 'logfire'
 Description-Content-Type: text/markdown

{pydantic_ai-0.4.4 → pydantic_ai-0.4.6}/tests/evals/test_dataset.py RENAMED Viewed

@@ -11,7 +11,7 @@ from dirty_equals import HasRepr, IsNumber
 from inline_snapshot import snapshot
 from pydantic import BaseModel
-from ..conftest import try_import
+from ..conftest import IsStr, try_import
 from .utils import render_table
 with try_import() as imports_successful:
@@ -1086,3 +1086,134 @@ def test_evaluate_non_serializable_inputs():
 │ Averages │                                                                                    │     1.0s │
 └──────────┴────────────────────────────────────────────────────────────────────────────────────┴──────────┘
 """)
+async def test_evaluate_async_logfire(
+    example_dataset: Dataset[TaskInput, TaskOutput, TaskMetadata],
+    simple_evaluator: type[Evaluator[TaskInput, TaskOutput, TaskMetadata]],
+    capfire: CaptureLogfire,
+):
+    """Test evaluating a dataset."""
+    example_dataset.add_evaluator(simple_evaluator())
+    async def mock_async_task(inputs: TaskInput) -> TaskOutput:
+        if inputs.query == 'What is 2+2?':
+            return TaskOutput(answer='4')
+        elif inputs.query == 'What is the capital of France?':
+            return TaskOutput(answer='Paris')
+        return TaskOutput(answer='Unknown')  # pragma: no cover
+    await example_dataset.evaluate(mock_async_task)
+    spans = capfire.exporter.exported_spans_as_dict()
+    spans.sort(key=lambda s: s['start_time'])
+    assert spans == [
+        {
+            'attributes': {
+                'averages': '{"name":"Averages","scores":{"confidence":1.0},"labels":{},"metrics":{},"assertions":1.0,"task_duration":1.0,"total_duration":5.0}',
+                'cases': '[{"name":"case1","inputs":{"query":"What is '
+                '2+2?"},"metadata":{"difficulty":"easy","category":"general"},"expected_output":{"answer":"4","confidence":1.0},"output":{"answer":"4","confidence":1.0},"metrics":{},"attributes":{},"scores":{"confidence":{"name":"confidence","value":1.0,"reason":null,"source":{"name":"SimpleEvaluator","arguments":null}}},"labels":{},"assertions":{"correct":{"name":"correct","value":true,"reason":null,"source":{"name":"SimpleEvaluator","arguments":null}}},"task_duration":1.0,"total_duration":6.0,"trace_id":"00000000000000000000000000000001","span_id":"0000000000000003"},{"name":"case2","inputs":{"query":"What '
+                'is the capital of '
+                'France?"},"metadata":{"difficulty":"medium","category":"geography"},"expected_output":{"answer":"Paris","confidence":1.0},"output":{"answer":"Paris","confidence":1.0},"metrics":{},"attributes":{},"scores":{"confidence":{"name":"confidence","value":1.0,"reason":null,"source":{"name":"SimpleEvaluator","arguments":null}}},"labels":{},"assertions":{"correct":{"name":"correct","value":true,"reason":null,"source":{"name":"SimpleEvaluator","arguments":null}}},"task_duration":1.0,"total_duration":4.0,"trace_id":"00000000000000000000000000000001","span_id":"0000000000000007"}]',
+                'code.filepath': 'test_dataset.py',
+                'code.function': 'test_evaluate_async_logfire',
+                'code.lineno': 123,
+                'logfire.json_schema': '{"type":"object","properties":{"name":{},"cases":{"type":"array"},"averages":{"type":"object"}}}',
+                'logfire.msg': 'evaluate mock_async_task',
+                'logfire.msg_template': 'evaluate {name}',
+                'logfire.span_type': 'span',
+                'name': 'mock_async_task',
+            },
+            'context': {'is_remote': False, 'span_id': 1, 'trace_id': 1},
+            'end_time': 10000000000,
+            'name': 'evaluate {name}',
+            'parent': None,
+            'start_time': 1000000000,
+        },
+        {
+            'attributes': {
+                'assertions': '{"correct":{"name":"correct","value":true,"reason":null,"source":{"name":"SimpleEvaluator","arguments":null}}}',
+                'attributes': '{}',
+                'case_name': 'case1',
+                'code.filepath': IsStr(),
+                'code.lineno': 123,
+                'expected_output': '{"answer":"4","confidence":1.0}',
+                'inputs': '{"query":"What is 2+2?"}',
+                'labels': '{}',
+                'logfire.json_schema': '{"type":"object","properties":{"task_name":{},"case_name":{},"inputs":{"type":"object","title":"TaskInput","x-python-datatype":"PydanticModel"},"metadata":{"type":"object","title":"TaskMetadata","x-python-datatype":"PydanticModel"},"expected_output":{"type":"object","title":"TaskOutput","x-python-datatype":"PydanticModel"},"output":{"type":"object","title":"TaskOutput","x-python-datatype":"PydanticModel"},"task_duration":{},"metrics":{"type":"object"},"attributes":{"type":"object"},"assertions":{"type":"object"},"scores":{"type":"object"},"labels":{"type":"object"}}}',
+                'logfire.msg': 'case: case1',
+                'logfire.msg_template': 'case: {case_name}',
+                'logfire.span_type': 'span',
+                'metadata': '{"difficulty":"easy","category":"general"}',
+                'metrics': '{}',
+                'output': '{"answer":"4","confidence":1.0}',
+                'scores': '{"confidence":{"name":"confidence","value":1.0,"reason":null,"source":{"name":"SimpleEvaluator","arguments":null}}}',
+                'task_duration': 1.0,
+                'task_name': 'mock_async_task',
+            },
+            'context': {'is_remote': False, 'span_id': 3, 'trace_id': 1},
+            'end_time': 8000000000,
+            'name': 'case: {case_name}',
+            'parent': {'is_remote': False, 'span_id': 1, 'trace_id': 1},
+            'start_time': 2000000000,
+        },
+        {
+            'attributes': {
+                'code.filepath': IsStr(),
+                'code.lineno': 123,
+                'logfire.json_schema': '{"type":"object","properties":{"task":{}}}',
+                'logfire.msg': 'execute mock_async_task',
+                'logfire.msg_template': 'execute {task}',
+                'logfire.span_type': 'span',
+                'task': 'mock_async_task',
+            },
+            'context': {'is_remote': False, 'span_id': 5, 'trace_id': 1},
+            'end_time': 4000000000,
+            'name': 'execute {task}',
+            'parent': {'is_remote': False, 'span_id': 3, 'trace_id': 1},
+            'start_time': 3000000000,
+        },
+        {
+            'attributes': {
+                'assertions': '{"correct":{"name":"correct","value":true,"reason":null,"source":{"name":"SimpleEvaluator","arguments":null}}}',
+                'attributes': '{}',
+                'case_name': 'case2',
+                'code.filepath': IsStr(),
+                'code.lineno': 123,
+                'expected_output': '{"answer":"Paris","confidence":1.0}',
+                'inputs': '{"query":"What is the capital of France?"}',
+                'labels': '{}',
+                'logfire.json_schema': '{"type":"object","properties":{"task_name":{},"case_name":{},"inputs":{"type":"object","title":"TaskInput","x-python-datatype":"PydanticModel"},"metadata":{"type":"object","title":"TaskMetadata","x-python-datatype":"PydanticModel"},"expected_output":{"type":"object","title":"TaskOutput","x-python-datatype":"PydanticModel"},"output":{"type":"object","title":"TaskOutput","x-python-datatype":"PydanticModel"},"task_duration":{},"metrics":{"type":"object"},"attributes":{"type":"object"},"assertions":{"type":"object"},"scores":{"type":"object"},"labels":{"type":"object"}}}',
+                'logfire.msg': 'case: case2',
+                'logfire.msg_template': 'case: {case_name}',
+                'logfire.span_type': 'span',
+                'metadata': '{"difficulty":"medium","category":"geography"}',
+                'metrics': '{}',
+                'output': '{"answer":"Paris","confidence":1.0}',
+                'scores': '{"confidence":{"name":"confidence","value":1.0,"reason":null,"source":{"name":"SimpleEvaluator","arguments":null}}}',
+                'task_duration': 1.0,
+                'task_name': 'mock_async_task',
+            },
+            'context': {'is_remote': False, 'span_id': 7, 'trace_id': 1},
+            'end_time': 9000000000,
+            'name': 'case: {case_name}',
+            'parent': {'is_remote': False, 'span_id': 1, 'trace_id': 1},
+            'start_time': 5000000000,
+        },
+        {
+            'attributes': {
+                'code.filepath': IsStr(),
+                'code.lineno': 123,
+                'logfire.json_schema': '{"type":"object","properties":{"task":{}}}',
+                'logfire.msg': 'execute mock_async_task',
+                'logfire.msg_template': 'execute {task}',
+                'logfire.span_type': 'span',
+                'task': 'mock_async_task',
+            },
+            'context': {'is_remote': False, 'span_id': 9, 'trace_id': 1},
+            'end_time': 7000000000,
+            'name': 'execute {task}',
+            'parent': {'is_remote': False, 'span_id': 7, 'trace_id': 1},
+            'start_time': 6000000000,
+        },
+    ]

{pydantic_ai-0.4.4 → pydantic_ai-0.4.6}/tests/evals/test_llm_as_a_judge.py RENAMED Viewed

@@ -1,9 +1,10 @@
 from __future__ import annotations as _annotations
 import pytest
+from inline_snapshot import snapshot
 from pytest_mock import MockerFixture
-from ..conftest import try_import
+from ..conftest import BinaryContent, try_import
 with try_import() as imports_successful:
     from pydantic_ai.settings import ModelSettings
@@ -141,6 +142,54 @@ async def test_judge_input_output_mock(mocker: MockerFixture):
     assert '<Rubric>\nOutput contains input\n</Rubric>' in call_args[0]
+async def test_judge_input_output_binary_content_list_mock(mocker: MockerFixture, image_content: BinaryContent):
+    """Test judge_input_output function with mocked agent."""
+    # Mock the agent run method
+    mock_result = mocker.MagicMock()
+    mock_result.output = GradingOutput(reason='Test passed', pass_=True, score=1.0)
+    mock_run = mocker.patch('pydantic_ai.Agent.run', return_value=mock_result)
+    result = await judge_input_output([image_content, image_content], 'Hello world', 'Output contains input')
+    assert isinstance(result, GradingOutput)
+    assert result.reason == 'Test passed'
+    assert result.pass_ is True
+    assert result.score == 1.0
+    # Verify the agent was called with correct prompt
+    mock_run.assert_called_once()
+    raw_prompt = mock_run.call_args[0][0]
+    # 1) It must be a list
+    assert isinstance(raw_prompt, list), 'Expected prompt to be a list when passing binary'
+    # 2) The BinaryContent you passed in should be one of the elements
+    assert image_content in raw_prompt, 'Expected the exact BinaryContent instance to be in the prompt list'
+async def test_judge_input_output_binary_content_mock(mocker: MockerFixture, image_content: BinaryContent):
+    """Test judge_input_output function with mocked agent."""
+    # Mock the agent run method
+    mock_result = mocker.MagicMock()
+    mock_result.output = GradingOutput(reason='Test passed', pass_=True, score=1.0)
+    mock_run = mocker.patch('pydantic_ai.Agent.run', return_value=mock_result)
+    result = await judge_input_output(image_content, 'Hello world', 'Output contains input')
+    assert isinstance(result, GradingOutput)
+    assert result.reason == 'Test passed'
+    assert result.pass_ is True
+    assert result.score == 1.0
+    # Verify the agent was called with correct prompt
+    mock_run.assert_called_once()
+    raw_prompt = mock_run.call_args[0][0]
+    # 1) It must be a list
+    assert isinstance(raw_prompt, list), 'Expected prompt to be a list when passing binary'
+    # 2) The BinaryContent you passed in should be one of the elements
+    assert image_content in raw_prompt, 'Expected the exact BinaryContent instance to be in the prompt list'
 @pytest.mark.anyio
 async def test_judge_input_output_with_model_settings_mock(mocker: MockerFixture):
     """Test judge_input_output function with model_settings and mocked agent."""
@@ -172,7 +221,7 @@ async def test_judge_input_output_with_model_settings_mock(mocker: MockerFixture
 @pytest.mark.anyio
-async def test_judge_input_output_expected_mock(mocker: MockerFixture):
+async def test_judge_input_output_expected_mock(mocker: MockerFixture, image_content: BinaryContent):
     """Test judge_input_output_expected function with mocked agent."""
     # Mock the agent run method
     mock_result = mocker.MagicMock()
@@ -187,16 +236,29 @@ async def test_judge_input_output_expected_mock(mocker: MockerFixture):
     assert result.score == 1.0
     # Verify the agent was called with correct prompt
-    mock_run.assert_called_once()
     call_args = mock_run.call_args[0]
     assert '<Input>\nHello\n</Input>' in call_args[0]
     assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
     assert '<Output>\nHello world\n</Output>' in call_args[0]
     assert '<Rubric>\nOutput contains input\n</Rubric>' in call_args[0]
+    result = await judge_input_output_expected(image_content, 'Hello world', 'Hello', 'Output contains input')
+    assert isinstance(result, GradingOutput)
+    assert result.reason == 'Test passed'
+    assert result.pass_ is True
+    assert result.score == 1.0
+    call_args = mock_run.call_args[0]
+    assert image_content in call_args[0]
+    assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
+    assert '<Output>\nHello world\n</Output>' in call_args[0]
+    assert '<Rubric>\nOutput contains input\n</Rubric>' in call_args[0]
 @pytest.mark.anyio
-async def test_judge_input_output_expected_with_model_settings_mock(mocker: MockerFixture):
+async def test_judge_input_output_expected_with_model_settings_mock(
+    mocker: MockerFixture, image_content: BinaryContent
+):
     """Test judge_input_output_expected function with model_settings and mocked agent."""
     mock_result = mocker.MagicMock()
     mock_result.output = GradingOutput(reason='Test passed with settings', pass_=True, score=1.0)
@@ -216,7 +278,6 @@ async def test_judge_input_output_expected_with_model_settings_mock(mocker: Mock
     assert result.pass_ is True
     assert result.score == 1.0
-    mock_run.assert_called_once()
     call_args, call_kwargs = mock_run.call_args
     assert '<Input>\nHello settings\n</Input>' in call_args[0]
     assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
@@ -226,6 +287,108 @@ async def test_judge_input_output_expected_with_model_settings_mock(mocker: Mock
     # Check if 'model' kwarg is passed, its value will be the default model or None
     assert 'model' in call_kwargs
+    result = await judge_input_output_expected(
+        image_content,
+        'Hello world with settings',
+        'Hello',
+        'Output contains input with settings',
+        model_settings=test_model_settings,
+    )
+    assert isinstance(result, GradingOutput)
+    assert result.reason == 'Test passed with settings'
+    assert result.pass_ is True
+    assert result.score == 1.0
+    call_args, call_kwargs = mock_run.call_args
+    assert image_content in call_args[0]
+    assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
+    assert '<Output>\nHello world with settings\n</Output>' in call_args[0]
+    assert '<Rubric>\nOutput contains input with settings\n</Rubric>' in call_args[0]
+    assert call_kwargs['model_settings'] == test_model_settings
+    # Check if 'model' kwarg is passed, its value will be the default model or None
+    assert 'model' in call_kwargs
+    result = await judge_input_output_expected(
+        123,
+        'Hello world with settings',
+        'Hello',
+        'Output contains input with settings',
+        model_settings=test_model_settings,
+    )
+    assert isinstance(result, GradingOutput)
+    assert result.reason == 'Test passed with settings'
+    assert result.pass_ is True
+    assert result.score == 1.0
+    call_args, call_kwargs = mock_run.call_args
+    assert call_args == snapshot(
+        (
+            [
+                '<Input>\n',
+                '123',
+                '</Input>',
+                """\
+<Output>
+Hello world with settings
+</Output>\
+""",
+                """\
+<Rubric>
+Output contains input with settings
+</Rubric>\
+""",
+                """\
+<ExpectedOutput>
+Hello
+</ExpectedOutput>\
+""",
+            ],
+        )
+    )
+    result = await judge_input_output_expected(
+        [123],
+        'Hello world with settings',
+        'Hello',
+        'Output contains input with settings',
+        model_settings=test_model_settings,
+    )
+    assert isinstance(result, GradingOutput)
+    assert result.reason == 'Test passed with settings'
+    assert result.pass_ is True
+    assert result.score == 1.0
+    call_args, call_kwargs = mock_run.call_args
+    assert call_args == snapshot(
+        (
+            [
+                '<Input>\n',
+                '123',
+                '</Input>',
+                """\
+<Output>
+Hello world with settings
+</Output>\
+""",
+                """\
+<Rubric>
+Output contains input with settings
+</Rubric>\
+""",
+                """\
+<ExpectedOutput>
+Hello
+</ExpectedOutput>\
+""",
+            ],
+        )
+    )
 @pytest.mark.anyio
 async def test_judge_output_expected_mock(mocker: MockerFixture):
@@ -243,7 +406,6 @@ async def test_judge_output_expected_mock(mocker: MockerFixture):
     assert result.score == 1.0
     # Verify the agent was called with correct prompt
-    mock_run.assert_called_once()
     call_args = mock_run.call_args[0]
     assert '<Input>' not in call_args[0]
     assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
@@ -252,7 +414,7 @@ async def test_judge_output_expected_mock(mocker: MockerFixture):
 @pytest.mark.anyio
-async def test_judge_output_expected_with_model_settings_mock(mocker: MockerFixture):
+async def test_judge_output_expected_with_model_settings_mock(mocker: MockerFixture, image_content: BinaryContent):
     """Test judge_output_expected function with model_settings and mocked agent."""
     mock_result = mocker.MagicMock()
     mock_result.output = GradingOutput(reason='Test passed with settings', pass_=True, score=1.0)
@@ -280,3 +442,23 @@ async def test_judge_output_expected_with_model_settings_mock(mocker: MockerFixt
     assert call_kwargs['model_settings'] == test_model_settings
     # Check if 'model' kwarg is passed, its value will be the default model or None
     assert 'model' in call_kwargs
+    result = await judge_output_expected(
+        image_content,
+        'Hello',
+        'Output contains input with settings',
+        model_settings=test_model_settings,
+    )
+    assert isinstance(result, GradingOutput)
+    assert result.reason == 'Test passed with settings'
+    assert result.pass_ is True
+    assert result.score == 1.0
+    call_args, call_kwargs = mock_run.call_args
+    assert '<Input>' not in call_args[0]
+    assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
+    assert '<Output>' in call_args[0]
+    assert '<Rubric>\nOutput contains input with settings\n</Rubric>' in call_args[0]
+    assert call_kwargs['model_settings'] == test_model_settings
+    # Check if 'model' kwarg is passed, its value will be the default model or None
+    assert 'model' in call_kwargs

pydantic-ai 0.4.4__tar.gz → 0.4.6__tar.gz

pydantic-ai 0.4.4tar.gz → 0.4.6tar.gz