pydantic-ai 0.4.11__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/PKG-INFO +3 -3
  2. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_dataset.py +76 -9
  3. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_evaluator_base.py +2 -2
  4. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_evaluator_spec.py +1 -1
  5. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_evaluators.py +18 -4
  6. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_reporting.py +61 -20
  7. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_reports.py +2 -2
  8. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_openai.py +231 -24
  9. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_agent.py +52 -3
  10. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_messages.py +23 -5
  11. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_tools.py +6 -6
  12. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/.gitignore +0 -0
  13. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/LICENSE +0 -0
  14. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/Makefile +0 -0
  15. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/README.md +0 -0
  16. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/pyproject.toml +0 -0
  17. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/__init__.py +0 -0
  18. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/assets/dummy.pdf +0 -0
  19. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/assets/kiwi.png +0 -0
  20. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/assets/marcelo.mp3 +0 -0
  21. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/assets/product_name.txt +0 -0
  22. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/assets/small_video.mp4 +0 -0
  23. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_agent_with_server_not_running.yaml +0 -0
  24. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_agent_with_stdio_server.yaml +0 -0
  25. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_audio_resource.yaml +0 -0
  26. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_audio_resource_link.yaml +0 -0
  27. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_dict.yaml +0 -0
  28. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_error.yaml +0 -0
  29. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_image.yaml +0 -0
  30. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_image_resource.yaml +0 -0
  31. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_image_resource_link.yaml +0 -0
  32. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_multiple_items.yaml +0 -0
  33. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_none.yaml +0 -0
  34. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_str.yaml +0 -0
  35. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_text_resource.yaml +0 -0
  36. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_text_resource_link.yaml +0 -0
  37. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[anthropic].yaml +0 -0
  38. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[bedrock].yaml +0 -0
  39. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[cohere].yaml +0 -0
  40. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[gemini].yaml +0 -0
  41. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[google].yaml +0 -0
  42. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[groq].yaml +0 -0
  43. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[mistral].yaml +0 -0
  44. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[openai].yaml +0 -0
  45. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/conftest.py +0 -0
  46. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/__init__.py +0 -0
  47. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_evaluator_common.py +0 -0
  48. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_evaluator_context.py +0 -0
  49. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_llm_as_a_judge.py +0 -0
  50. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_otel.py +0 -0
  51. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_render_numbers.py +0 -0
  52. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_utils.py +0 -0
  53. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/utils.py +0 -0
  54. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/example_modules/README.md +0 -0
  55. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/example_modules/bank_database.py +0 -0
  56. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/example_modules/fake_database.py +0 -0
  57. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/example_modules/mcp_server.py +0 -0
  58. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/example_modules/weather_service.py +0 -0
  59. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/ext/__init__.py +0 -0
  60. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/ext/test_langchain.py +0 -0
  61. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/__init__.py +0 -0
  62. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_file_persistence.py +0 -0
  63. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_graph.py +0 -0
  64. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_mermaid.py +0 -0
  65. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_persistence.py +0 -0
  66. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_state.py +0 -0
  67. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_utils.py +0 -0
  68. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/import_examples.py +0 -0
  69. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/json_body_serializer.py +0 -0
  70. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/mcp_server.py +0 -0
  71. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/__init__.py +0 -0
  72. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_empty_message_on_history.yaml +0 -0
  73. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_instructions.yaml +0 -0
  74. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_thinking_part.yaml +0 -0
  75. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_thinking_part_stream.yaml +0 -0
  76. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_prompted_output.yaml +0 -0
  77. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_prompted_output_multiple.yaml +0 -0
  78. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_text_output_function.yaml +0 -0
  79. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_tool_output.yaml +0 -0
  80. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_tool_with_thinking.yaml +0 -0
  81. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_document_binary_content_input.yaml +0 -0
  82. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_document_url_input.yaml +0 -0
  83. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_extra_headers.yaml +0 -0
  84. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_image_as_binary_content_tool_response.yaml +0 -0
  85. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_image_url_input.yaml +0 -0
  86. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_image_url_input_invalid_mime_type.yaml +0 -0
  87. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_multiple_parallel_tool_calls.yaml +0 -0
  88. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_text_document_url_input.yaml +0 -0
  89. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_empty_system_prompt.yaml +0 -0
  90. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model.yaml +0 -0
  91. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_anthropic_model_without_tools.yaml +0 -0
  92. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_guardrail_config.yaml +0 -0
  93. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_instructions.yaml +0 -0
  94. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_iter_stream.yaml +0 -0
  95. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_max_tokens.yaml +0 -0
  96. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_other_parameters.yaml +0 -0
  97. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_performance_config.yaml +0 -0
  98. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_retry.yaml +0 -0
  99. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_stream.yaml +0 -0
  100. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_structured_output.yaml +0 -0
  101. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_thinking_part.yaml +0 -0
  102. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_thinking_part_stream.yaml +0 -0
  103. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_top_p.yaml +0 -0
  104. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_multiple_documents_in_history.yaml +0 -0
  105. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_document_url_input.yaml +0 -0
  106. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_image_as_binary_content_input.yaml +0 -0
  107. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_image_url_input.yaml +0 -0
  108. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_text_as_binary_content_input.yaml +0 -0
  109. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_text_document_url_input.yaml +0 -0
  110. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_video_as_binary_content_input.yaml +0 -0
  111. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_video_url_input.yaml +0 -0
  112. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_cohere/test_cohere_model_instructions.yaml +0 -0
  113. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_cohere/test_cohere_model_thinking_part.yaml +0 -0
  114. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_cohere/test_request_simple_success_with_vcr.yaml +0 -0
  115. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_deepseek/test_deepseek_model_thinking_part.yaml +0 -0
  116. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_deepseek/test_deepseek_model_thinking_stream.yaml +0 -0
  117. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_download_item/test_download_item_application_octet_stream.yaml +0 -0
  118. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_download_item/test_download_item_audio_mpeg.yaml +0 -0
  119. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_download_item/test_download_item_no_content_type.yaml +0 -0
  120. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_document_url_input.yaml +0 -0
  121. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_additional_properties_is_false.yaml +0 -0
  122. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_additional_properties_is_true.yaml +0 -0
  123. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_drop_exclusive_maximum.yaml +0 -0
  124. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_exclusive_minimum_and_maximum.yaml +0 -0
  125. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_model_instructions.yaml +0 -0
  126. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_model_thinking_part.yaml +0 -0
  127. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_native_output.yaml +0 -0
  128. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_native_output_multiple.yaml +0 -0
  129. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_prompted_output.yaml +0 -0
  130. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_prompted_output_multiple.yaml +0 -0
  131. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_prompted_output_with_tools.yaml +0 -0
  132. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_text_output_function.yaml +0 -0
  133. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_tool_config_any_with_tool_without_args.yaml +0 -0
  134. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_tool_output.yaml +0 -0
  135. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_youtube_video_url_input.yaml +0 -0
  136. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_image_as_binary_content_input.yaml +0 -0
  137. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_image_as_binary_content_tool_response.yaml +0 -0
  138. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_image_url_input.yaml +0 -0
  139. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_labels_are_ignored_with_gla_provider.yaml +0 -0
  140. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_video_as_binary_content_input.yaml +0 -0
  141. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_video_url_input.yaml +0 -0
  142. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_labels.yaml +0 -0
  143. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[AudioUrl (gs)].yaml +0 -0
  144. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[AudioUrl].yaml +0 -0
  145. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[DocumentUrl (gs)].yaml +0 -0
  146. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[DocumentUrl].yaml +0 -0
  147. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[ImageUrl (gs)].yaml +0 -0
  148. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[ImageUrl].yaml +0 -0
  149. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[VideoUrl (YouTube)].yaml +0 -0
  150. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[VideoUrl (gs)].yaml +0 -0
  151. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[VideoUrl].yaml +0 -0
  152. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input_force_download.yaml +0 -0
  153. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model.yaml +0 -0
  154. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_document_url_input.yaml +0 -0
  155. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_empty_assistant_response.yaml +0 -0
  156. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_empty_user_prompt.yaml +0 -0
  157. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_image_as_binary_content_input.yaml +0 -0
  158. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_image_url_input.yaml +0 -0
  159. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_instructions.yaml +0 -0
  160. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_iter_stream.yaml +0 -0
  161. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_max_tokens.yaml +0 -0
  162. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_multiple_documents_in_history.yaml +0 -0
  163. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_retry.yaml +0 -0
  164. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_safety_settings.yaml +0 -0
  165. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_stream.yaml +0 -0
  166. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_structured_output.yaml +0 -0
  167. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_text_as_binary_content_input.yaml +0 -0
  168. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_text_document_url_input.yaml +0 -0
  169. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_thinking_config.yaml +0 -0
  170. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_thinking_part.yaml +0 -0
  171. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_thinking_part_iter.yaml +0 -0
  172. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_top_p.yaml +0 -0
  173. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_vertex_labels.yaml +0 -0
  174. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_vertex_provider.yaml +0 -0
  175. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_video_as_binary_content_input.yaml +0 -0
  176. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_video_as_binary_content_input_with_vendor_metadata.yaml +0 -0
  177. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_video_url_input.yaml +0 -0
  178. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_youtube_video_url_input_with_vendor_metadata.yaml +0 -0
  179. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_native_output.yaml +0 -0
  180. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_native_output_multiple.yaml +0 -0
  181. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_prompted_output.yaml +0 -0
  182. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_prompted_output_multiple.yaml +0 -0
  183. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_prompted_output_with_tools.yaml +0 -0
  184. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_text_output_function.yaml +0 -0
  185. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_timeout.yaml +0 -0
  186. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_tool_config_any_with_tool_without_args.yaml +0 -0
  187. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_tool_output.yaml +0 -0
  188. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[AudioUrl (gs)].yaml +0 -0
  189. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[AudioUrl].yaml +0 -0
  190. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[DocumentUrl (gs)].yaml +0 -0
  191. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[DocumentUrl].yaml +0 -0
  192. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[ImageUrl (gs)].yaml +0 -0
  193. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[ImageUrl].yaml +0 -0
  194. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[VideoUrl (YouTube)].yaml +0 -0
  195. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[VideoUrl (gs)].yaml +0 -0
  196. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[VideoUrl].yaml +0 -0
  197. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input_force_download.yaml +0 -0
  198. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_extra_headers.yaml +0 -0
  199. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_groq_model_instructions.yaml +0 -0
  200. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_groq_model_thinking_part.yaml +0 -0
  201. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_groq_model_thinking_part_iter.yaml +0 -0
  202. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_image_as_binary_content_input.yaml +0 -0
  203. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_image_as_binary_content_tool_response.yaml +0 -0
  204. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_image_url_input.yaml +0 -0
  205. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_hf_model_instructions.yaml +0 -0
  206. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_hf_model_thinking_part.yaml +0 -0
  207. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_hf_model_thinking_part_iter.yaml +0 -0
  208. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_image_as_binary_content_input.yaml +0 -0
  209. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_image_url_input.yaml +0 -0
  210. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_max_completion_tokens[Qwen-Qwen2.5-72B-Instruct].yaml +0 -0
  211. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_max_completion_tokens[deepseek-ai-DeepSeek-R1-0528].yaml +0 -0
  212. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_max_completion_tokens[meta-llama-Llama-3.3-70B-Instruct].yaml +0 -0
  213. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_request_simple_success_with_vcr.yaml +0 -0
  214. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_request_simple_usage.yaml +0 -0
  215. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_simple_completion.yaml +0 -0
  216. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_stream_completion.yaml +0 -0
  217. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_mistral/test_image_as_binary_content_tool_response.yaml +0 -0
  218. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_mistral/test_mistral_model_instructions.yaml +0 -0
  219. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_mistral/test_mistral_model_thinking_part.yaml +0 -0
  220. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_model_names/test_known_model_names.yaml +0 -0
  221. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_audio_as_binary_content_input.yaml +0 -0
  222. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_compatible_api_with_tool_calls_without_id.yaml +0 -0
  223. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_document_as_binary_content_input.yaml +0 -0
  224. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_document_url_input.yaml +0 -0
  225. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_extra_headers.yaml +0 -0
  226. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_image_as_binary_content_input.yaml +0 -0
  227. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_image_as_binary_content_tool_response.yaml +0 -0
  228. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_image_url_tool_response.yaml +0 -0
  229. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_invalid_response.yaml +0 -0
  230. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_max_completion_tokens[gpt-4.5-preview].yaml +0 -0
  231. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_max_completion_tokens[gpt-4o-mini].yaml +0 -0
  232. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_max_completion_tokens[o3-mini].yaml +0 -0
  233. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_multiple_agent_tool_calls.yaml +0 -0
  234. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_audio_url_input.yaml +0 -0
  235. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_instructions.yaml +0 -0
  236. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_instructions_with_tool_calls_keep_instructions.yaml +0 -0
  237. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_model_thinking_part.yaml +0 -0
  238. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_model_thinking_part_iter.yaml +0 -0
  239. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_model_without_system_prompt.yaml +0 -0
  240. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_native_output.yaml +0 -0
  241. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_native_output_multiple.yaml +0 -0
  242. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_o1_mini_system_role[developer].yaml +0 -0
  243. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_o1_mini_system_role[system].yaml +0 -0
  244. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_prompted_output.yaml +0 -0
  245. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_prompted_output_multiple.yaml +0 -0
  246. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_responses_model_thinking_part.yaml +0 -0
  247. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_text_output_function.yaml +0 -0
  248. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_tool_output.yaml +0 -0
  249. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_reasoning_model_with_temperature.yaml +0 -0
  250. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_text_response.yaml +0 -0
  251. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_user_id.yaml +0 -0
  252. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_valid_response.yaml +0 -0
  253. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_audio_as_binary_content_input.yaml +0 -0
  254. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_image_as_binary_content_input.yaml +0 -0
  255. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_image_as_binary_content_tool_response.yaml +0 -0
  256. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_native_output.yaml +0 -0
  257. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_native_output_multiple.yaml +0 -0
  258. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_document_as_binary_content_input.yaml +0 -0
  259. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_document_url_input.yaml +0 -0
  260. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_url_input.yaml +0 -0
  261. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_builtin_tools.yaml +0 -0
  262. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_http_error.yaml +0 -0
  263. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_instructions.yaml +0 -0
  264. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_retry.yaml +0 -0
  265. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_simple_response.yaml +0 -0
  266. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_simple_response_with_tool_call.yaml +0 -0
  267. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_output_type.yaml +0 -0
  268. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_reasoning_effort.yaml +0 -0
  269. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_reasoning_generate_summary.yaml +0 -0
  270. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_stream.yaml +0 -0
  271. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_system_prompt.yaml +0 -0
  272. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_text_document_url_input.yaml +0 -0
  273. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_prompted_output.yaml +0 -0
  274. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_prompted_output_multiple.yaml +0 -0
  275. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_reasoning_model_with_temperature.yaml +0 -0
  276. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_text_output_function.yaml +0 -0
  277. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_tool_output.yaml +0 -0
  278. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/mock_async_stream.py +0 -0
  279. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_anthropic.py +0 -0
  280. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_bedrock.py +0 -0
  281. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_cohere.py +0 -0
  282. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_deepseek.py +0 -0
  283. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_download_item.py +0 -0
  284. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_fallback.py +0 -0
  285. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_gemini.py +0 -0
  286. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_gemini_vertex.py +0 -0
  287. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_google.py +0 -0
  288. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_groq.py +0 -0
  289. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_huggingface.py +0 -0
  290. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_instrumented.py +0 -0
  291. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_mcp_sampling.py +0 -0
  292. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_mistral.py +0 -0
  293. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model.py +0 -0
  294. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model_function.py +0 -0
  295. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model_names.py +0 -0
  296. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model_request_parameters.py +0 -0
  297. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model_settings.py +0 -0
  298. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model_test.py +0 -0
  299. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_openai_responses.py +0 -0
  300. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/__init__.py +0 -0
  301. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/cassettes/test_azure/test_azure_provider_call.yaml +0 -0
  302. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/cassettes/test_google_vertex/test_vertexai_provider.yaml +0 -0
  303. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/cassettes/test_heroku/test_heroku_model_provider_claude_3_7_sonnet.yaml +0 -0
  304. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/cassettes/test_openrouter/test_openrouter_with_google_model.yaml +0 -0
  305. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_anthropic.py +0 -0
  306. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_azure.py +0 -0
  307. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_bedrock.py +0 -0
  308. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_cohere.py +0 -0
  309. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_deepseek.py +0 -0
  310. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_fireworks.py +0 -0
  311. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_github.py +0 -0
  312. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_google_gla.py +0 -0
  313. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_google_vertex.py +0 -0
  314. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_grok.py +0 -0
  315. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_groq.py +0 -0
  316. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_heroku.py +0 -0
  317. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_huggingface.py +0 -0
  318. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_mistral.py +0 -0
  319. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_moonshotai.py +0 -0
  320. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_openai.py +0 -0
  321. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_openrouter.py +0 -0
  322. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_provider_names.py +0 -0
  323. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_together.py +0 -0
  324. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_vercel.py +0 -0
  325. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_a2a.py +0 -0
  326. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_ag_ui.py +0 -0
  327. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_cli.py +0 -0
  328. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_deps.py +0 -0
  329. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_direct.py +0 -0
  330. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_examples.py +0 -0
  331. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_format_as_xml.py +0 -0
  332. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_history_processor.py +0 -0
  333. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_json_body_serializer.py +0 -0
  334. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_live.py +0 -0
  335. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_logfire.py +0 -0
  336. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_mcp.py +0 -0
  337. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_parts_manager.py +0 -0
  338. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_settings.py +0 -0
  339. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_streaming.py +0 -0
  340. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_tenacity.py +0 -0
  341. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_thinking_part.py +0 -0
  342. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_toolsets.py +0 -0
  343. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_usage_limits.py +0 -0
  344. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_utils.py +0 -0
  345. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/typed_agent.py +0 -0
  346. {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/typed_graph.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydantic-ai
3
- Version: 0.4.11
3
+ Version: 0.5.0
4
4
  Summary: Agent Framework / shim to use Pydantic with LLMs
5
5
  Project-URL: Homepage, https://ai.pydantic.dev
6
6
  Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -28,11 +28,11 @@ Classifier: Topic :: Internet
28
28
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
29
29
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
30
30
  Requires-Python: >=3.9
31
- Requires-Dist: pydantic-ai-slim[ag-ui,anthropic,bedrock,cli,cohere,evals,google,groq,huggingface,mcp,mistral,openai,retries,vertexai]==0.4.11
31
+ Requires-Dist: pydantic-ai-slim[ag-ui,anthropic,bedrock,cli,cohere,evals,google,groq,huggingface,mcp,mistral,openai,retries,vertexai]==0.5.0
32
32
  Provides-Extra: a2a
33
33
  Requires-Dist: fasta2a>=0.4.1; extra == 'a2a'
34
34
  Provides-Extra: examples
35
- Requires-Dist: pydantic-ai-examples==0.4.11; extra == 'examples'
35
+ Requires-Dist: pydantic-ai-examples==0.5.0; extra == 'examples'
36
36
  Provides-Extra: logfire
37
37
  Requires-Dist: logfire>=3.11.0; extra == 'logfire'
38
38
  Description-Content-Type: text/markdown
@@ -9,7 +9,7 @@ from typing import Any
9
9
  import pytest
10
10
  from dirty_equals import HasRepr, IsNumber
11
11
  from inline_snapshot import snapshot
12
- from pydantic import BaseModel
12
+ from pydantic import BaseModel, TypeAdapter
13
13
 
14
14
  from ..conftest import IsStr, try_import
15
15
  from .utils import render_table
@@ -20,7 +20,7 @@ with try_import() as imports_successful:
20
20
 
21
21
  from pydantic_evals import Case, Dataset
22
22
  from pydantic_evals.dataset import increment_eval_metric, set_eval_attribute
23
- from pydantic_evals.evaluators import EvaluationResult, Evaluator, EvaluatorOutput, LLMJudge, Python
23
+ from pydantic_evals.evaluators import EvaluationResult, Evaluator, EvaluatorOutput, EvaluatorSpec, LLMJudge, Python
24
24
  from pydantic_evals.evaluators.context import EvaluatorContext
25
25
 
26
26
  @dataclass
@@ -32,7 +32,7 @@ with try_import() as imports_successful:
32
32
  def evaluate(self, ctx: EvaluatorContext[object, object, object]) -> EvaluatorOutput:
33
33
  return self.output
34
34
 
35
- from pydantic_evals.reporting import ReportCase, ReportCaseAdapter
35
+ from pydantic_evals.reporting import EvaluationReport, ReportCase, ReportCaseAdapter
36
36
 
37
37
  pytestmark = [pytest.mark.skipif(not imports_successful(), reason='pydantic-evals not installed'), pytest.mark.anyio]
38
38
 
@@ -456,13 +456,13 @@ async def test_repeated_name_outputs(example_dataset: Dataset[TaskInput, TaskOut
456
456
  scores={},
457
457
  labels={
458
458
  'output': EvaluationResult(
459
- name='output', value='a', reason=None, source=MockEvaluator(output={'output': 'a'})
459
+ name='output', value='a', reason=None, source=MockEvaluator(output={'output': 'a'}).as_spec()
460
460
  ),
461
461
  'output_2': EvaluationResult(
462
- name='output', value='b', reason=None, source=MockEvaluator(output={'output': 'b'})
462
+ name='output', value='b', reason=None, source=MockEvaluator(output={'output': 'b'}).as_spec()
463
463
  ),
464
464
  'output_3': EvaluationResult(
465
- name='output', value='c', reason=None, source=MockEvaluator(output={'output': 'c'})
465
+ name='output', value='c', reason=None, source=MockEvaluator(output={'output': 'c'}).as_spec()
466
466
  ),
467
467
  },
468
468
  assertions={},
@@ -482,13 +482,13 @@ async def test_repeated_name_outputs(example_dataset: Dataset[TaskInput, TaskOut
482
482
  scores={},
483
483
  labels={
484
484
  'output': EvaluationResult(
485
- name='output', value='a', reason=None, source=MockEvaluator(output={'output': 'a'})
485
+ name='output', value='a', reason=None, source=MockEvaluator(output={'output': 'a'}).as_spec()
486
486
  ),
487
487
  'output_2': EvaluationResult(
488
- name='output', value='b', reason=None, source=MockEvaluator(output={'output': 'b'})
488
+ name='output', value='b', reason=None, source=MockEvaluator(output={'output': 'b'}).as_spec()
489
489
  ),
490
490
  'output_3': EvaluationResult(
491
- name='output', value='c', reason=None, source=MockEvaluator(output={'output': 'c'})
491
+ name='output', value='c', reason=None, source=MockEvaluator(output={'output': 'c'}).as_spec()
492
492
  ),
493
493
  },
494
494
  assertions={},
@@ -501,6 +501,73 @@ async def test_repeated_name_outputs(example_dataset: Dataset[TaskInput, TaskOut
501
501
  )
502
502
 
503
503
 
504
+ async def test_report_round_trip_serialization(example_dataset: Dataset[TaskInput, TaskOutput, TaskMetadata]):
505
+ """Test the increment_eval_metric function."""
506
+
507
+ async def my_task(inputs: TaskInput) -> TaskOutput:
508
+ return TaskOutput(answer=f'answer to {inputs.query}')
509
+
510
+ example_dataset.add_evaluator(MockEvaluator({'output': 'a'}))
511
+
512
+ report = await example_dataset.evaluate(my_task)
513
+ assert report == snapshot(
514
+ EvaluationReport(
515
+ name='my_task',
516
+ cases=[
517
+ ReportCase(
518
+ name='case1',
519
+ inputs=TaskInput(query='What is 2+2?'),
520
+ metadata=TaskMetadata(difficulty='easy', category='general'),
521
+ expected_output=TaskOutput(answer='4', confidence=1.0),
522
+ output=TaskOutput(answer='answer to What is 2+2?', confidence=1.0),
523
+ metrics={},
524
+ attributes={},
525
+ scores={},
526
+ labels={
527
+ 'output': EvaluationResult(
528
+ name='output',
529
+ value='a',
530
+ reason=None,
531
+ source=EvaluatorSpec(name='MockEvaluator', arguments=({'output': 'a'},)),
532
+ )
533
+ },
534
+ assertions={},
535
+ task_duration=1.0,
536
+ total_duration=6.0,
537
+ trace_id='00000000000000000000000000000001',
538
+ span_id='0000000000000003',
539
+ ),
540
+ ReportCase(
541
+ name='case2',
542
+ inputs=TaskInput(query='What is the capital of France?'),
543
+ metadata=TaskMetadata(difficulty='medium', category='geography'),
544
+ expected_output=TaskOutput(answer='Paris', confidence=1.0),
545
+ output=TaskOutput(answer='answer to What is the capital of France?', confidence=1.0),
546
+ metrics={},
547
+ attributes={},
548
+ scores={},
549
+ labels={
550
+ 'output': EvaluationResult(
551
+ name='output',
552
+ value='a',
553
+ reason=None,
554
+ source=EvaluatorSpec(name='MockEvaluator', arguments=({'output': 'a'},)),
555
+ )
556
+ },
557
+ assertions={},
558
+ task_duration=1.0,
559
+ total_duration=4.0,
560
+ trace_id='00000000000000000000000000000001',
561
+ span_id='0000000000000007',
562
+ ),
563
+ ],
564
+ )
565
+ )
566
+
567
+ report_adapter = TypeAdapter(EvaluationReport[TaskInput, TaskOutput, TaskMetadata])
568
+ assert report == report_adapter.validate_json(report_adapter.dump_json(report, indent=2))
569
+
570
+
504
571
  async def test_genai_attribute_collection(example_dataset: Dataset[TaskInput, TaskOutput, TaskMetadata]):
505
572
  async def my_task(inputs: TaskInput) -> TaskOutput:
506
573
  with logfire.span(
@@ -52,11 +52,11 @@ def test_evaluation_result():
52
52
  evaluator = DummyEvaluator()
53
53
 
54
54
  # Test basic result
55
- result = EvaluationResult(name='test', value=True, reason='Success', source=evaluator)
55
+ result = EvaluationResult(name='test', value=True, reason='Success', source=evaluator.as_spec())
56
56
  assert result.name == 'test'
57
57
  assert result.value is True
58
58
  assert result.reason == 'Success'
59
- assert result.source == evaluator
59
+ assert result.source == evaluator.as_spec()
60
60
 
61
61
  # Test downcast with matching type
62
62
  downcast = result.downcast(bool)
@@ -6,7 +6,7 @@ from pydantic import ValidationError
6
6
  from ..conftest import try_import
7
7
 
8
8
  with try_import() as imports_successful:
9
- from pydantic_evals.evaluators._spec import (
9
+ from pydantic_evals.evaluators.spec import (
10
10
  EvaluatorSpec,
11
11
  _SerializedEvaluatorSpec, # pyright: ignore[reportPrivateUsage]
12
12
  )
@@ -19,7 +19,6 @@ with try_import() as imports_successful:
19
19
  from logfire.testing import CaptureLogfire
20
20
 
21
21
  from pydantic_evals.evaluators._run_evaluator import run_evaluator
22
- from pydantic_evals.evaluators._spec import EvaluatorSpec
23
22
  from pydantic_evals.evaluators.common import (
24
23
  Contains,
25
24
  Equals,
@@ -36,6 +35,7 @@ with try_import() as imports_successful:
36
35
  Evaluator,
37
36
  EvaluatorOutput,
38
37
  )
38
+ from pydantic_evals.evaluators.spec import EvaluatorSpec
39
39
  from pydantic_evals.otel._context_in_memory_span_exporter import context_subtree
40
40
  from pydantic_evals.otel.span_tree import SpanQuery, SpanTree
41
41
 
@@ -162,7 +162,7 @@ async def test_evaluator_call(test_context: EvaluatorContext[TaskInput, TaskOutp
162
162
  assert results[0].name == 'result'
163
163
  assert results[0].value == 'passed'
164
164
  assert results[0].reason is None
165
- assert results[0].source is evaluator
165
+ assert results[0].source == EvaluatorSpec(name='ExampleEvaluator', arguments=None)
166
166
 
167
167
 
168
168
  async def test_is_instance_evaluator():
@@ -242,7 +242,14 @@ async def test_custom_evaluator_name(test_context: EvaluatorContext[TaskInput, T
242
242
  evaluator = CustomNameFieldEvaluator(result=123, evaluation_name='abc')
243
243
 
244
244
  assert to_jsonable_python(await run_evaluator(evaluator, test_context)) == snapshot(
245
- [{'name': 'abc', 'reason': None, 'source': {'evaluation_name': 'abc', 'result': 123}, 'value': 123}]
245
+ [
246
+ {
247
+ 'name': 'abc',
248
+ 'reason': None,
249
+ 'source': {'arguments': {'evaluation_name': 'abc', 'result': 123}, 'name': 'CustomNameFieldEvaluator'},
250
+ 'value': 123,
251
+ }
252
+ ]
246
253
  )
247
254
 
248
255
  @dataclass
@@ -260,7 +267,14 @@ async def test_custom_evaluator_name(test_context: EvaluatorContext[TaskInput, T
260
267
  evaluator = CustomNamePropertyEvaluator(result=123, my_name='marcelo')
261
268
 
262
269
  assert to_jsonable_python(await run_evaluator(evaluator, test_context)) == snapshot(
263
- [{'name': 'hello marcelo', 'reason': None, 'source': {'my_name': 'marcelo', 'result': 123}, 'value': 123}]
270
+ [
271
+ {
272
+ 'name': 'hello marcelo',
273
+ 'reason': None,
274
+ 'source': {'arguments': {'my_name': 'marcelo', 'result': 123}, 'name': 'CustomNamePropertyEvaluator'},
275
+ 'value': 123,
276
+ }
277
+ ]
264
278
  )
265
279
 
266
280
 
@@ -48,7 +48,7 @@ def sample_assertion(mock_evaluator: Evaluator[TaskInput, TaskOutput, TaskMetada
48
48
  name='MockEvaluator',
49
49
  value=True,
50
50
  reason=None,
51
- source=mock_evaluator,
51
+ source=mock_evaluator.as_spec(),
52
52
  )
53
53
 
54
54
 
@@ -57,8 +57,8 @@ def sample_score(mock_evaluator: Evaluator[TaskInput, TaskOutput, TaskMetadata])
57
57
  return EvaluationResult(
58
58
  name='MockEvaluator',
59
59
  value=2.5,
60
- reason=None,
61
- source=mock_evaluator,
60
+ reason='my reason',
61
+ source=mock_evaluator.as_spec(),
62
62
  )
63
63
 
64
64
 
@@ -68,7 +68,7 @@ def sample_label(mock_evaluator: Evaluator[TaskInput, TaskOutput, TaskMetadata])
68
68
  name='MockEvaluator',
69
69
  value='hello',
70
70
  reason=None,
71
- source=mock_evaluator,
71
+ source=mock_evaluator.as_spec(),
72
72
  )
73
73
 
74
74
 
@@ -120,6 +120,7 @@ async def test_evaluation_renderer_basic(sample_report: EvaluationReport):
120
120
  label_configs={},
121
121
  metric_configs={},
122
122
  duration_config={},
123
+ include_reasons=False,
123
124
  )
124
125
 
125
126
  table = renderer.build_table(sample_report)
@@ -137,6 +138,43 @@ async def test_evaluation_renderer_basic(sample_report: EvaluationReport):
137
138
  """)
138
139
 
139
140
 
141
+ async def test_evaluation_renderer_with_reasons(sample_report: EvaluationReport):
142
+ """Test basic functionality of EvaluationRenderer."""
143
+ renderer = EvaluationRenderer(
144
+ include_input=True,
145
+ include_output=True,
146
+ include_metadata=True,
147
+ include_expected_output=True,
148
+ include_durations=True,
149
+ include_total_duration=True,
150
+ include_removed_cases=False,
151
+ include_averages=True,
152
+ input_config={},
153
+ metadata_config={},
154
+ output_config={},
155
+ score_configs={},
156
+ label_configs={},
157
+ metric_configs={},
158
+ duration_config={},
159
+ include_reasons=True,
160
+ )
161
+
162
+ table = renderer.build_table(sample_report)
163
+ assert render_table(table) == snapshot("""\
164
+ Evaluation Summary: test_report
165
+ ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
166
+ ┃ Case ID ┃ Inputs ┃ Metadata ┃ Expected Output ┃ Outputs ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Durations ┃
167
+ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
168
+ │ test_case │ {'query': 'What is 2+2?'} │ {'difficulty': 'easy'} │ {'answer': '4'} │ {'answer': '4'} │ score1: 2.50 │ label1: hello │ accuracy: 0.950 │ MockEvaluator: ✔ │ task: 0.100 │
169
+ │ │ │ │ │ │ Reason: my reason │ │ │ │ total: 0.200 │
170
+ │ │ │ │ │ │ │ │ │ │ │
171
+ ├───────────┼───────────────────────────┼────────────────────────┼─────────────────┼─────────────────┼─────────────────────┼────────────────────────┼─────────────────┼──────────────────┼──────────────┤
172
+ │ Averages │ │ │ │ │ score1: 2.50 │ label1: {'hello': 1.0} │ accuracy: 0.950 │ 100.0% ✔ │ task: 0.100 │
173
+ │ │ │ │ │ │ │ │ │ │ total: 0.200 │
174
+ └───────────┴───────────────────────────┴────────────────────────┴─────────────────┴─────────────────┴─────────────────────┴────────────────────────┴─────────────────┴──────────────────┴──────────────┘
175
+ """)
176
+
177
+
140
178
  async def test_evaluation_renderer_with_baseline(sample_report: EvaluationReport):
141
179
  """Test EvaluationRenderer with baseline comparison."""
142
180
  baseline_report = EvaluationReport(
@@ -191,20 +229,21 @@ async def test_evaluation_renderer_with_baseline(sample_report: EvaluationReport
191
229
  label_configs={},
192
230
  metric_configs={},
193
231
  duration_config={},
232
+ include_reasons=False,
194
233
  )
195
234
 
196
235
  table = renderer.build_diff_table(sample_report, baseline_report)
197
236
  assert render_table(table) == snapshot("""\
198
- Evaluation Diff: baseline_report → test_report
199
- ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
200
- ┃ Case ID ┃ Inputs ┃ Metadata ┃ Expected Output ┃ Outputs ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Durations ┃
201
- ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
202
- │ test_case │ {'query': 'What is 2+2?'} │ {'difficulty': 'easy'} │ {'answer': '4'} │ {'answer': '4'} │ score1: 2.50 │ label1: EvaluationResult(name='MockEvaluator', value='hello', reason=None, │ accuracy: 0.900 → 0.950 (+0.05 / +5.6%) │ → ✔ │ task: 0.150 → 0.100 (-0.05 / -33.3%) │
203
- │ │ │ │ │ │ │ source=mock_evaluator.<locals>.MockEvaluator()) │ │ │ total: 0.250 → 0.200 (-0.05 / -20.0%) │
204
- ├───────────┼───────────────────────────┼────────────────────────┼─────────────────┼─────────────────┼──────────────┼─────────────────────────────────────────────────────────────────────────────────────┼─────────────────────────────────────────┼──────────────┼───────────────────────────────────────┤
205
- │ Averages │ │ │ │ │ score1: 2.50 │ label1: {'hello': 1.0} │ accuracy: 0.900 → 0.950 (+0.05 / +5.6%) │ - → 100.0% ✔ │ task: 0.150 → 0.100 (-0.05 / -33.3%) │
206
- │ │ │ │ │ │ │ │ │ │ total: 0.250 → 0.200 (-0.05 / -20.0%) │
207
- └───────────┴───────────────────────────┴────────────────────────┴─────────────────┴─────────────────┴──────────────┴─────────────────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────┴──────────────┴───────────────────────────────────────┘
237
+ Evaluation Diff: baseline_report → test_report
238
+ ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
239
+ ┃ Case ID ┃ Inputs ┃ Metadata ┃ Expected Output ┃ Outputs ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Durations ┃
240
+ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
241
+ │ test_case │ {'query': 'What is 2+2?'} │ {'difficulty': 'easy'} │ {'answer': '4'} │ {'answer': '4'} │ score1: 2.50 │ label1: hello │ accuracy: 0.900 → 0.950 (+0.05 / +5.6%) │ → ✔ │ task: 0.150 → 0.100 (-0.05 / -33.3%) │
242
+ │ │ │ │ │ │ │ │ │ │ total: 0.250 → 0.200 (-0.05 / -20.0%) │
243
+ ├───────────┼───────────────────────────┼────────────────────────┼─────────────────┼─────────────────┼──────────────┼────────────────────────┼─────────────────────────────────────────┼──────────────┼───────────────────────────────────────┤
244
+ │ Averages │ │ │ │ │ score1: 2.50 │ label1: {'hello': 1.0} │ accuracy: 0.900 → 0.950 (+0.05 / +5.6%) │ - → 100.0% ✔ │ task: 0.150 → 0.100 (-0.05 / -33.3%) │
245
+ │ │ │ │ │ │ │ │ │ │ total: 0.250 → 0.200 (-0.05 / -20.0%) │
246
+ └───────────┴───────────────────────────┴────────────────────────┴─────────────────┴─────────────────┴──────────────┴────────────────────────┴─────────────────────────────────────────┴──────────────┴───────────────────────────────────────┘
208
247
  """)
209
248
 
210
249
 
@@ -248,6 +287,7 @@ async def test_evaluation_renderer_with_removed_cases(sample_report: EvaluationR
248
287
  label_configs={},
249
288
  metric_configs={},
250
289
  duration_config={},
290
+ include_reasons=False,
251
291
  )
252
292
 
253
293
  table = renderer.build_diff_table(sample_report, baseline_report)
@@ -311,6 +351,7 @@ async def test_evaluation_renderer_with_custom_configs(sample_report: Evaluation
311
351
  'diff_increase_style': 'bold red',
312
352
  'diff_decrease_style': 'bold green',
313
353
  },
354
+ include_reasons=False,
314
355
  )
315
356
 
316
357
  table = renderer.build_table(sample_report)
@@ -350,7 +391,7 @@ async def test_report_case_aggregate_average():
350
391
  name='MockEvaluator',
351
392
  value=0.8,
352
393
  reason=None,
353
- source=MockEvaluator(),
394
+ source=MockEvaluator().as_spec(),
354
395
  )
355
396
  },
356
397
  labels={
@@ -358,7 +399,7 @@ async def test_report_case_aggregate_average():
358
399
  name='MockEvaluator',
359
400
  value='good',
360
401
  reason=None,
361
- source=MockEvaluator(),
402
+ source=MockEvaluator().as_spec(),
362
403
  )
363
404
  },
364
405
  assertions={
@@ -366,7 +407,7 @@ async def test_report_case_aggregate_average():
366
407
  name='MockEvaluator',
367
408
  value=True,
368
409
  reason=None,
369
- source=MockEvaluator(),
410
+ source=MockEvaluator().as_spec(),
370
411
  )
371
412
  },
372
413
  task_duration=0.1,
@@ -387,7 +428,7 @@ async def test_report_case_aggregate_average():
387
428
  name='MockEvaluator',
388
429
  value=0.7,
389
430
  reason=None,
390
- source=MockEvaluator(),
431
+ source=MockEvaluator().as_spec(),
391
432
  )
392
433
  },
393
434
  labels={
@@ -395,7 +436,7 @@ async def test_report_case_aggregate_average():
395
436
  name='MockEvaluator',
396
437
  value='good',
397
438
  reason=None,
398
- source=MockEvaluator(),
439
+ source=MockEvaluator().as_spec(),
399
440
  )
400
441
  },
401
442
  assertions={
@@ -403,7 +444,7 @@ async def test_report_case_aggregate_average():
403
444
  name='MockEvaluator',
404
445
  value=False,
405
446
  reason=None,
406
- source=MockEvaluator(),
447
+ source=MockEvaluator().as_spec(),
407
448
  )
408
449
  },
409
450
  task_duration=0.15,
@@ -57,7 +57,7 @@ def sample_evaluation_result(
57
57
  name='MockEvaluator',
58
58
  value=True,
59
59
  reason=None,
60
- source=mock_evaluator,
60
+ source=mock_evaluator.as_spec(),
61
61
  )
62
62
 
63
63
 
@@ -177,7 +177,7 @@ async def test_report_with_error(mock_evaluator: Evaluator[TaskInput, TaskOutput
177
177
  name='error_evaluator',
178
178
  value=False, # No result
179
179
  reason='Test error message',
180
- source=mock_evaluator,
180
+ source=mock_evaluator.as_spec(),
181
181
  )
182
182
 
183
183
  # Create a case