PyPI - pydantic-ai - Versions diffs - 0.0.48__tar.gz → 0.0.50__tar.gz - Mend

pydantic-ai 0.0.48tar.gz → 0.0.50tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-ai might be problematic. Click here for more details.

Files changed (145) hide show

{pydantic_ai-0.0.48 → pydantic_ai-0.0.50}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydantic-ai
-Version: 0.0.48
+Version: 0.0.50
 Summary: Agent Framework / shim to use Pydantic with LLMs
 Project-URL: Homepage, https://ai.pydantic.dev
 Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -28,9 +28,9 @@ Classifier: Topic :: Internet
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.9
-Requires-Dist: pydantic-ai-slim[anthropic,bedrock,cli,cohere,evals,groq,mcp,mistral,openai,vertexai]==0.0.48
+Requires-Dist: pydantic-ai-slim[anthropic,bedrock,cli,cohere,evals,groq,mcp,mistral,openai,vertexai]==0.0.50
 Provides-Extra: examples
-Requires-Dist: pydantic-ai-examples==0.0.48; extra == 'examples'
+Requires-Dist: pydantic-ai-examples==0.0.50; extra == 'examples'
 Provides-Extra: logfire
 Requires-Dist: logfire>=3.11.0; extra == 'logfire'
 Description-Content-Type: text/markdown

{pydantic_ai-0.0.48 → pydantic_ai-0.0.50}/tests/evals/test_dataset.py RENAMED Viewed

@@ -13,6 +13,7 @@ from inline_snapshot import snapshot
 from pydantic import BaseModel
 from ..conftest import try_import
+from .utils import render_table
 with try_import() as imports_successful:
     from pydantic_evals import Case, Dataset
@@ -164,16 +165,12 @@ async def test_add_evaluator(
         'cases': [
             {
                 'evaluators': [{'Python': 'ctx.output == 2'}],
-                'expected_output': None,
                 'inputs': {'query': 'What is 1+1?'},
-                'metadata': None,
                 'name': 'My Case 1',
             },
             {
                 'evaluators': [{'Python': 'ctx.output == 4'}],
-                'expected_output': None,
                 'inputs': {'query': 'What is 2+2?'},
-                'metadata': None,
                 'name': 'My Case 2',
             },
         ],
@@ -346,40 +343,42 @@ async def test_increment_eval_metric(example_dataset: Dataset[TaskInput, TaskOut
         return TaskOutput(answer=f'answer to {inputs.query}')
     report = await example_dataset.evaluate(my_task)
-    assert report.cases == [
-        ReportCase(
-            name='case1',
-            inputs={'query': 'What is 2+2?'},
-            metadata=TaskMetadata(difficulty='easy', category='general'),
-            expected_output=TaskOutput(answer='4', confidence=1.0),
-            output=TaskOutput(answer='answer to What is 2+2?', confidence=1.0),
-            metrics={'chars': 12},
-            attributes={'is_about_france': False},
-            scores={},
-            labels={},
-            assertions={},
-            task_duration=1.0,
-            total_duration=3.0,
-            trace_id='00000000000000000000000000000001',
-            span_id='0000000000000003',
-        ),
-        ReportCase(
-            name='case2',
-            inputs={'query': 'What is the capital of France?'},
-            metadata=TaskMetadata(difficulty='medium', category='geography'),
-            expected_output=TaskOutput(answer='Paris', confidence=1.0),
-            output=TaskOutput(answer='answer to What is the capital of France?', confidence=1.0),
-            metrics={'chars': 30},
-            attributes={'is_about_france': True},
-            scores={},
-            labels={},
-            assertions={},
-            task_duration=1.0,
-            total_duration=3.0,
-            trace_id='00000000000000000000000000000001',
-            span_id='0000000000000007',
-        ),
-    ]
+    assert report.cases == snapshot(
+        [
+            ReportCase(
+                name='case1',
+                inputs=TaskInput(query='What is 2+2?'),
+                metadata=TaskMetadata(difficulty='easy', category='general'),
+                expected_output=TaskOutput(answer='4', confidence=1.0),
+                output=TaskOutput(answer='answer to What is 2+2?', confidence=1.0),
+                metrics={'chars': 12},
+                attributes={'is_about_france': False},
+                scores={},
+                labels={},
+                assertions={},
+                task_duration=1.0,
+                total_duration=3.0,
+                trace_id='00000000000000000000000000000001',
+                span_id='0000000000000003',
+            ),
+            ReportCase(
+                name='case2',
+                inputs=TaskInput(query='What is the capital of France?'),
+                metadata=TaskMetadata(difficulty='medium', category='geography'),
+                expected_output=TaskOutput(answer='Paris', confidence=1.0),
+                output=TaskOutput(answer='answer to What is the capital of France?', confidence=1.0),
+                metrics={'chars': 30},
+                attributes={'is_about_france': True},
+                scores={},
+                labels={},
+                assertions={},
+                task_duration=1.0,
+                total_duration=3.0,
+                trace_id='00000000000000000000000000000001',
+                span_id='0000000000000007',
+            ),
+        ]
+    )
 async def test_repeated_name_outputs(example_dataset: Dataset[TaskInput, TaskOutput, TaskMetadata]):
@@ -397,7 +396,7 @@ async def test_repeated_name_outputs(example_dataset: Dataset[TaskInput, TaskOut
         [
             ReportCase(
                 name='case1',
-                inputs={'query': 'What is 2+2?'},
+                inputs=TaskInput(query='What is 2+2?'),
                 metadata=TaskMetadata(difficulty='easy', category='general'),
                 expected_output=TaskOutput(answer='4', confidence=1.0),
                 output=TaskOutput(answer='answer to What is 2+2?', confidence=1.0),
@@ -423,7 +422,7 @@ async def test_repeated_name_outputs(example_dataset: Dataset[TaskInput, TaskOut
             ),
             ReportCase(
                 name='case2',
-                inputs={'query': 'What is the capital of France?'},
+                inputs=TaskInput(query='What is the capital of France?'),
                 metadata=TaskMetadata(difficulty='medium', category='geography'),
                 expected_output=TaskOutput(answer='Paris', confidence=1.0),
                 output=TaskOutput(answer='answer to What is the capital of France?', confidence=1.0),
@@ -471,7 +470,7 @@ async def test_genai_attribute_collection(example_dataset: Dataset[TaskInput, Ta
         [
             ReportCase(
                 name='case1',
-                inputs={'query': 'What is 2+2?'},
+                inputs=TaskInput(query='What is 2+2?'),
                 metadata=TaskMetadata(difficulty='easy', category='general'),
                 expected_output=TaskOutput(answer='4', confidence=1.0),
                 output=TaskOutput(answer='answer to What is 2+2?', confidence=1.0),
@@ -487,7 +486,7 @@ async def test_genai_attribute_collection(example_dataset: Dataset[TaskInput, Ta
             ),
             ReportCase(
                 name='case2',
-                inputs={'query': 'What is the capital of France?'},
+                inputs=TaskInput(query='What is the capital of France?'),
                 metadata=TaskMetadata(difficulty='medium', category='geography'),
                 expected_output=TaskOutput(answer='Paris', confidence=1.0),
                 output=TaskOutput(answer='answer to What is the capital of France?', confidence=1.0),
@@ -992,3 +991,47 @@ def test_import_generate_dataset():
     from pydantic_evals.generation import generate_dataset
     assert generate_dataset
+def test_evaluate_non_serializable_inputs():
+    @dataclass
+    class MyInputs:
+        result_type: type[str] | type[int]
+    my_dataset = Dataset[MyInputs, Any, Any](
+        cases=[
+            Case(
+                name='str',
+                inputs=MyInputs(result_type=str),
+                expected_output='abc',
+            ),
+            Case(
+                name='int',
+                inputs=MyInputs(result_type=int),
+                expected_output=123,
+            ),
+        ],
+    )
+    async def my_task(my_inputs: MyInputs) -> int | str:
+        if issubclass(my_inputs.result_type, str):
+            return my_inputs.result_type('abc')
+        else:
+            return my_inputs.result_type(123)
+    report = my_dataset.evaluate_sync(task=my_task)
+    assert [c.inputs for c in report.cases] == snapshot([MyInputs(result_type=str), MyInputs(result_type=int)])
+    table = report.console_table(include_input=True)
+    assert render_table(table) == snapshot("""\
+                                        Evaluation Summary: my_task
+┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
+┃ Case ID  ┃ Inputs                                                                             ┃ Duration ┃
+┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
+│ str      │ test_evaluate_non_serializable_inputs.<locals>.MyInputs(result_type=<class 'str'>) │     1.0s │
+├──────────┼────────────────────────────────────────────────────────────────────────────────────┼──────────┤
+│ int      │ test_evaluate_non_serializable_inputs.<locals>.MyInputs(result_type=<class 'int'>) │     1.0s │
+├──────────┼────────────────────────────────────────────────────────────────────────────────────┼──────────┤
+│ Averages │                                                                                    │     1.0s │
+└──────────┴────────────────────────────────────────────────────────────────────────────────────┴──────────┘
+""")

{pydantic_ai-0.0.48 → pydantic_ai-0.0.50}/tests/evals/test_otel.py RENAMED Viewed

@@ -245,26 +245,26 @@ async def test_span_tree_repr(span_tree: SpanTree):
 """)
     assert span_tree.repr_xml(include_span_id=True) == snapshot("""\
 <SpanTree>
-  <SpanNode name='root' span_id=0000000000000001 >
-    <SpanNode name='child1' span_id=0000000000000003 >
-      <SpanNode name='grandchild1' span_id=0000000000000005 />
-      <SpanNode name='grandchild2' span_id=0000000000000007 />
+  <SpanNode name='root' span_id='0000000000000001' >
+    <SpanNode name='child1' span_id='0000000000000003' >
+      <SpanNode name='grandchild1' span_id='0000000000000005' />
+      <SpanNode name='grandchild2' span_id='0000000000000007' />
     </SpanNode>
-    <SpanNode name='child2' span_id=0000000000000009 >
-      <SpanNode name='grandchild3' span_id=000000000000000b />
+    <SpanNode name='child2' span_id='0000000000000009' >
+      <SpanNode name='grandchild3' span_id='000000000000000b' />
     </SpanNode>
   </SpanNode>
 </SpanTree>\
 """)
     assert span_tree.repr_xml(include_trace_id=True) == snapshot("""\
 <SpanTree>
-  <SpanNode name='root' trace_id=00000000000000000000000000000001 >
-    <SpanNode name='child1' trace_id=00000000000000000000000000000001 >
-      <SpanNode name='grandchild1' trace_id=00000000000000000000000000000001 />
-      <SpanNode name='grandchild2' trace_id=00000000000000000000000000000001 />
+  <SpanNode name='root' trace_id='00000000000000000000000000000001' >
+    <SpanNode name='child1' trace_id='00000000000000000000000000000001' >
+      <SpanNode name='grandchild1' trace_id='00000000000000000000000000000001' />
+      <SpanNode name='grandchild2' trace_id='00000000000000000000000000000001' />
     </SpanNode>
-    <SpanNode name='child2' trace_id=00000000000000000000000000000001 >
-      <SpanNode name='grandchild3' trace_id=00000000000000000000000000000001 />
+    <SpanNode name='child2' trace_id='00000000000000000000000000000001' >
+      <SpanNode name='grandchild3' trace_id='00000000000000000000000000000001' />
     </SpanNode>
   </SpanNode>
 </SpanTree>\
@@ -302,9 +302,9 @@ async def test_span_node_repr(span_tree: SpanTree):
     assert node is not None
     leaf_node = span_tree.first({'name_equals': 'grandchild1'})
-    assert str(leaf_node) == snapshot("<SpanNode name='grandchild1' span_id=0000000000000005 />")
+    assert str(leaf_node) == snapshot("<SpanNode name='grandchild1' span_id='0000000000000005' />")
-    assert str(node) == snapshot("<SpanNode name='child2' span_id=0000000000000009>...</SpanNode>")
+    assert str(node) == snapshot("<SpanNode name='child2' span_id='0000000000000009'>...</SpanNode>")
     assert repr(node) == snapshot("""\
 <SpanNode name='child2' >
   <SpanNode name='grandchild3' />
@@ -312,13 +312,13 @@ async def test_span_node_repr(span_tree: SpanTree):
 """)
     assert node.repr_xml(include_children=False) == snapshot("<SpanNode name='child2' children=... />")
     assert node.repr_xml(include_span_id=True) == snapshot("""\
-<SpanNode name='child2' span_id=0000000000000009 >
-  <SpanNode name='grandchild3' span_id=000000000000000b />
+<SpanNode name='child2' span_id='0000000000000009' >
+  <SpanNode name='grandchild3' span_id='000000000000000b' />
 </SpanNode>\
 """)
     assert node.repr_xml(include_trace_id=True) == snapshot("""\
-<SpanNode name='child2' trace_id=00000000000000000000000000000001 >
-  <SpanNode name='grandchild3' trace_id=00000000000000000000000000000001 />
+<SpanNode name='child2' trace_id='00000000000000000000000000000001' >
+  <SpanNode name='grandchild3' trace_id='00000000000000000000000000000001' />
 </SpanNode>\
 """)
     assert node.repr_xml(include_start_timestamp=True) == snapshot("""\
@@ -383,6 +383,17 @@ async def test_span_tree_ancestors_methods():
     assert not leaf_node.matches({'no_ancestor_has': {'name_matches_regex': 'root'}})
     assert leaf_node.matches({'no_ancestor_has': {'name_matches_regex': 'abc'}})
+    # Test stop_recursing_when:
+    assert not leaf_node.matches(
+        {'some_ancestor_has': {'name_equals': 'level1'}, 'stop_recursing_when': {'name_equals': 'level2'}}
+    )
+    assert leaf_node.matches(
+        {'all_ancestors_have': {'name_matches_regex': 'level'}, 'stop_recursing_when': {'name_equals': 'level1'}}
+    )
+    assert leaf_node.matches(
+        {'no_ancestor_has': {'name_matches_regex': 'root'}, 'stop_recursing_when': {'name_equals': 'level1'}}
+    )
 async def test_span_tree_descendants_methods():
     """Test the descendant traversal methods in SpanNode."""
@@ -462,6 +473,17 @@ async def test_span_tree_descendants_methods():
     assert leaf_node.matches(negated_descendant_query)
     assert leaf_node.matches({'no_descendant_has': {'has_attributes': {'depth': 4}}})
+    # Test stop_recursing_when:
+    assert not root_node.matches(
+        {'some_descendant_has': {'name_equals': 'leaf'}, 'stop_recursing_when': {'name_equals': 'level2'}}
+    )
+    assert root_node.matches(
+        {'all_descendants_have': {'has_attribute_keys': ['depth']}, 'stop_recursing_when': {'name_equals': 'level2'}}
+    )
+    assert root_node.matches(
+        {'no_descendant_has': {'name_equals': 'leaf'}, 'stop_recursing_when': {'name_equals': 'level3'}}
+    )
 async def test_log_levels_and_exceptions():
     """Test recording different log levels and exceptions in spans."""

pydantic-ai 0.0.48__tar.gz → 0.0.50__tar.gz

Potentially problematic release.

pydantic-ai 0.0.48tar.gz → 0.0.50tar.gz