pydantic-ai 0.4.11__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/PKG-INFO +3 -3
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_dataset.py +76 -9
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_evaluator_base.py +2 -2
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_evaluator_spec.py +1 -1
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_evaluators.py +18 -4
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_reporting.py +61 -20
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_reports.py +2 -2
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_openai.py +231 -24
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_agent.py +52 -3
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_messages.py +23 -5
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_tools.py +6 -6
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/.gitignore +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/LICENSE +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/Makefile +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/README.md +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/pyproject.toml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/__init__.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/assets/dummy.pdf +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/assets/kiwi.png +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/assets/marcelo.mp3 +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/assets/product_name.txt +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/assets/small_video.mp4 +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_agent_with_server_not_running.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_agent_with_stdio_server.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_audio_resource.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_audio_resource_link.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_dict.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_error.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_image.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_image_resource.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_image_resource_link.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_multiple_items.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_none.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_str.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_text_resource.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_mcp/test_tool_returning_text_resource_link.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[anthropic].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[bedrock].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[cohere].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[gemini].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[google].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[groq].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[mistral].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/cassettes/test_settings/test_stop_settings[openai].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/conftest.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/__init__.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_evaluator_common.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_evaluator_context.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_llm_as_a_judge.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_otel.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_render_numbers.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/test_utils.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/evals/utils.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/example_modules/README.md +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/example_modules/bank_database.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/example_modules/fake_database.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/example_modules/mcp_server.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/example_modules/weather_service.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/ext/__init__.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/ext/test_langchain.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/__init__.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_file_persistence.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_graph.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_mermaid.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_persistence.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_state.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/graph/test_utils.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/import_examples.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/json_body_serializer.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/mcp_server.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/__init__.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_empty_message_on_history.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_instructions.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_thinking_part.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_thinking_part_stream.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_prompted_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_prompted_output_multiple.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_text_output_function.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_tool_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_anthropic_tool_with_thinking.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_document_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_extra_headers.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_image_url_input_invalid_mime_type.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_multiple_parallel_tool_calls.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_anthropic/test_text_document_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_empty_system_prompt.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_anthropic_model_without_tools.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_guardrail_config.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_instructions.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_iter_stream.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_max_tokens.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_other_parameters.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_performance_config.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_retry.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_stream.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_structured_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_thinking_part.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_thinking_part_stream.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_top_p.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_bedrock_multiple_documents_in_history.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_text_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_text_document_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_video_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_bedrock/test_video_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_cohere/test_cohere_model_instructions.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_cohere/test_cohere_model_thinking_part.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_cohere/test_request_simple_success_with_vcr.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_deepseek/test_deepseek_model_thinking_part.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_deepseek/test_deepseek_model_thinking_stream.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_download_item/test_download_item_application_octet_stream.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_download_item/test_download_item_audio_mpeg.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_download_item/test_download_item_no_content_type.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_additional_properties_is_false.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_additional_properties_is_true.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_drop_exclusive_maximum.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_exclusive_minimum_and_maximum.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_model_instructions.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_model_thinking_part.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_native_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_native_output_multiple.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_prompted_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_prompted_output_multiple.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_prompted_output_with_tools.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_text_output_function.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_tool_config_any_with_tool_without_args.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_tool_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_gemini_youtube_video_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_labels_are_ignored_with_gla_provider.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_video_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini/test_video_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_labels.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[AudioUrl (gs)].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[AudioUrl].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[DocumentUrl (gs)].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[DocumentUrl].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[ImageUrl (gs)].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[ImageUrl].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[VideoUrl (YouTube)].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[VideoUrl (gs)].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[VideoUrl].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_gemini_vertex/test_url_input_force_download.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_document_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_empty_assistant_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_empty_user_prompt.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_image_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_instructions.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_iter_stream.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_max_tokens.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_multiple_documents_in_history.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_retry.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_safety_settings.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_stream.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_structured_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_text_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_text_document_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_thinking_config.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_thinking_part.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_thinking_part_iter.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_top_p.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_vertex_labels.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_vertex_provider.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_video_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_video_as_binary_content_input_with_vendor_metadata.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_video_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_model_youtube_video_url_input_with_vendor_metadata.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_native_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_native_output_multiple.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_prompted_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_prompted_output_multiple.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_prompted_output_with_tools.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_text_output_function.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_timeout.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_tool_config_any_with_tool_without_args.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_tool_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[AudioUrl (gs)].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[AudioUrl].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[DocumentUrl (gs)].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[DocumentUrl].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[ImageUrl (gs)].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[ImageUrl].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[VideoUrl (YouTube)].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[VideoUrl (gs)].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input[VideoUrl].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_google/test_google_url_input_force_download.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_extra_headers.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_groq_model_instructions.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_groq_model_thinking_part.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_groq_model_thinking_part_iter.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_groq/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_hf_model_instructions.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_hf_model_thinking_part.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_hf_model_thinking_part_iter.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_max_completion_tokens[Qwen-Qwen2.5-72B-Instruct].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_max_completion_tokens[deepseek-ai-DeepSeek-R1-0528].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_max_completion_tokens[meta-llama-Llama-3.3-70B-Instruct].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_request_simple_success_with_vcr.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_request_simple_usage.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_simple_completion.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_huggingface/test_stream_completion.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_mistral/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_mistral/test_mistral_model_instructions.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_mistral/test_mistral_model_thinking_part.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_model_names/test_known_model_names.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_audio_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_compatible_api_with_tool_calls_without_id.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_document_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_extra_headers.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_image_url_tool_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_invalid_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_max_completion_tokens[gpt-4.5-preview].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_max_completion_tokens[gpt-4o-mini].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_max_completion_tokens[o3-mini].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_multiple_agent_tool_calls.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_audio_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_instructions.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_instructions_with_tool_calls_keep_instructions.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_model_thinking_part.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_model_thinking_part_iter.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_model_without_system_prompt.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_native_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_native_output_multiple.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_o1_mini_system_role[developer].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_o1_mini_system_role[system].yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_prompted_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_prompted_output_multiple.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_responses_model_thinking_part.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_text_output_function.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_openai_tool_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_reasoning_model_with_temperature.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_text_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_user_id.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai/test_valid_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_audio_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_native_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_native_output_multiple.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_document_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_document_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_builtin_tools.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_http_error.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_instructions.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_retry.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_simple_response.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_simple_response_with_tool_call.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_output_type.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_reasoning_effort.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_reasoning_generate_summary.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_stream.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_system_prompt.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_text_document_url_input.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_prompted_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_prompted_output_multiple.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_reasoning_model_with_temperature.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_text_output_function.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/cassettes/test_openai_responses/test_tool_output.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/mock_async_stream.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_anthropic.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_bedrock.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_cohere.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_deepseek.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_download_item.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_fallback.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_gemini.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_gemini_vertex.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_google.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_groq.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_huggingface.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_instrumented.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_mcp_sampling.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_mistral.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model_function.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model_names.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model_request_parameters.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model_settings.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_model_test.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/models/test_openai_responses.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/__init__.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/cassettes/test_azure/test_azure_provider_call.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/cassettes/test_google_vertex/test_vertexai_provider.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/cassettes/test_heroku/test_heroku_model_provider_claude_3_7_sonnet.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/cassettes/test_openrouter/test_openrouter_with_google_model.yaml +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_anthropic.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_azure.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_bedrock.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_cohere.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_deepseek.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_fireworks.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_github.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_google_gla.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_google_vertex.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_grok.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_groq.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_heroku.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_huggingface.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_mistral.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_moonshotai.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_openai.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_openrouter.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_provider_names.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_together.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/providers/test_vercel.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_a2a.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_ag_ui.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_cli.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_deps.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_direct.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_examples.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_format_as_xml.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_history_processor.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_json_body_serializer.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_live.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_logfire.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_mcp.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_parts_manager.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_settings.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_streaming.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_tenacity.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_thinking_part.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_toolsets.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_usage_limits.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/test_utils.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/typed_agent.py +0 -0
- {pydantic_ai-0.4.11 → pydantic_ai-0.5.0}/tests/typed_graph.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydantic-ai
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Agent Framework / shim to use Pydantic with LLMs
|
|
5
5
|
Project-URL: Homepage, https://ai.pydantic.dev
|
|
6
6
|
Project-URL: Source, https://github.com/pydantic/pydantic-ai
|
|
@@ -28,11 +28,11 @@ Classifier: Topic :: Internet
|
|
|
28
28
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
29
29
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
30
30
|
Requires-Python: >=3.9
|
|
31
|
-
Requires-Dist: pydantic-ai-slim[ag-ui,anthropic,bedrock,cli,cohere,evals,google,groq,huggingface,mcp,mistral,openai,retries,vertexai]==0.
|
|
31
|
+
Requires-Dist: pydantic-ai-slim[ag-ui,anthropic,bedrock,cli,cohere,evals,google,groq,huggingface,mcp,mistral,openai,retries,vertexai]==0.5.0
|
|
32
32
|
Provides-Extra: a2a
|
|
33
33
|
Requires-Dist: fasta2a>=0.4.1; extra == 'a2a'
|
|
34
34
|
Provides-Extra: examples
|
|
35
|
-
Requires-Dist: pydantic-ai-examples==0.
|
|
35
|
+
Requires-Dist: pydantic-ai-examples==0.5.0; extra == 'examples'
|
|
36
36
|
Provides-Extra: logfire
|
|
37
37
|
Requires-Dist: logfire>=3.11.0; extra == 'logfire'
|
|
38
38
|
Description-Content-Type: text/markdown
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
import pytest
|
|
10
10
|
from dirty_equals import HasRepr, IsNumber
|
|
11
11
|
from inline_snapshot import snapshot
|
|
12
|
-
from pydantic import BaseModel
|
|
12
|
+
from pydantic import BaseModel, TypeAdapter
|
|
13
13
|
|
|
14
14
|
from ..conftest import IsStr, try_import
|
|
15
15
|
from .utils import render_table
|
|
@@ -20,7 +20,7 @@ with try_import() as imports_successful:
|
|
|
20
20
|
|
|
21
21
|
from pydantic_evals import Case, Dataset
|
|
22
22
|
from pydantic_evals.dataset import increment_eval_metric, set_eval_attribute
|
|
23
|
-
from pydantic_evals.evaluators import EvaluationResult, Evaluator, EvaluatorOutput, LLMJudge, Python
|
|
23
|
+
from pydantic_evals.evaluators import EvaluationResult, Evaluator, EvaluatorOutput, EvaluatorSpec, LLMJudge, Python
|
|
24
24
|
from pydantic_evals.evaluators.context import EvaluatorContext
|
|
25
25
|
|
|
26
26
|
@dataclass
|
|
@@ -32,7 +32,7 @@ with try_import() as imports_successful:
|
|
|
32
32
|
def evaluate(self, ctx: EvaluatorContext[object, object, object]) -> EvaluatorOutput:
|
|
33
33
|
return self.output
|
|
34
34
|
|
|
35
|
-
from pydantic_evals.reporting import ReportCase, ReportCaseAdapter
|
|
35
|
+
from pydantic_evals.reporting import EvaluationReport, ReportCase, ReportCaseAdapter
|
|
36
36
|
|
|
37
37
|
pytestmark = [pytest.mark.skipif(not imports_successful(), reason='pydantic-evals not installed'), pytest.mark.anyio]
|
|
38
38
|
|
|
@@ -456,13 +456,13 @@ async def test_repeated_name_outputs(example_dataset: Dataset[TaskInput, TaskOut
|
|
|
456
456
|
scores={},
|
|
457
457
|
labels={
|
|
458
458
|
'output': EvaluationResult(
|
|
459
|
-
name='output', value='a', reason=None, source=MockEvaluator(output={'output': 'a'})
|
|
459
|
+
name='output', value='a', reason=None, source=MockEvaluator(output={'output': 'a'}).as_spec()
|
|
460
460
|
),
|
|
461
461
|
'output_2': EvaluationResult(
|
|
462
|
-
name='output', value='b', reason=None, source=MockEvaluator(output={'output': 'b'})
|
|
462
|
+
name='output', value='b', reason=None, source=MockEvaluator(output={'output': 'b'}).as_spec()
|
|
463
463
|
),
|
|
464
464
|
'output_3': EvaluationResult(
|
|
465
|
-
name='output', value='c', reason=None, source=MockEvaluator(output={'output': 'c'})
|
|
465
|
+
name='output', value='c', reason=None, source=MockEvaluator(output={'output': 'c'}).as_spec()
|
|
466
466
|
),
|
|
467
467
|
},
|
|
468
468
|
assertions={},
|
|
@@ -482,13 +482,13 @@ async def test_repeated_name_outputs(example_dataset: Dataset[TaskInput, TaskOut
|
|
|
482
482
|
scores={},
|
|
483
483
|
labels={
|
|
484
484
|
'output': EvaluationResult(
|
|
485
|
-
name='output', value='a', reason=None, source=MockEvaluator(output={'output': 'a'})
|
|
485
|
+
name='output', value='a', reason=None, source=MockEvaluator(output={'output': 'a'}).as_spec()
|
|
486
486
|
),
|
|
487
487
|
'output_2': EvaluationResult(
|
|
488
|
-
name='output', value='b', reason=None, source=MockEvaluator(output={'output': 'b'})
|
|
488
|
+
name='output', value='b', reason=None, source=MockEvaluator(output={'output': 'b'}).as_spec()
|
|
489
489
|
),
|
|
490
490
|
'output_3': EvaluationResult(
|
|
491
|
-
name='output', value='c', reason=None, source=MockEvaluator(output={'output': 'c'})
|
|
491
|
+
name='output', value='c', reason=None, source=MockEvaluator(output={'output': 'c'}).as_spec()
|
|
492
492
|
),
|
|
493
493
|
},
|
|
494
494
|
assertions={},
|
|
@@ -501,6 +501,73 @@ async def test_repeated_name_outputs(example_dataset: Dataset[TaskInput, TaskOut
|
|
|
501
501
|
)
|
|
502
502
|
|
|
503
503
|
|
|
504
|
+
async def test_report_round_trip_serialization(example_dataset: Dataset[TaskInput, TaskOutput, TaskMetadata]):
|
|
505
|
+
"""Test the increment_eval_metric function."""
|
|
506
|
+
|
|
507
|
+
async def my_task(inputs: TaskInput) -> TaskOutput:
|
|
508
|
+
return TaskOutput(answer=f'answer to {inputs.query}')
|
|
509
|
+
|
|
510
|
+
example_dataset.add_evaluator(MockEvaluator({'output': 'a'}))
|
|
511
|
+
|
|
512
|
+
report = await example_dataset.evaluate(my_task)
|
|
513
|
+
assert report == snapshot(
|
|
514
|
+
EvaluationReport(
|
|
515
|
+
name='my_task',
|
|
516
|
+
cases=[
|
|
517
|
+
ReportCase(
|
|
518
|
+
name='case1',
|
|
519
|
+
inputs=TaskInput(query='What is 2+2?'),
|
|
520
|
+
metadata=TaskMetadata(difficulty='easy', category='general'),
|
|
521
|
+
expected_output=TaskOutput(answer='4', confidence=1.0),
|
|
522
|
+
output=TaskOutput(answer='answer to What is 2+2?', confidence=1.0),
|
|
523
|
+
metrics={},
|
|
524
|
+
attributes={},
|
|
525
|
+
scores={},
|
|
526
|
+
labels={
|
|
527
|
+
'output': EvaluationResult(
|
|
528
|
+
name='output',
|
|
529
|
+
value='a',
|
|
530
|
+
reason=None,
|
|
531
|
+
source=EvaluatorSpec(name='MockEvaluator', arguments=({'output': 'a'},)),
|
|
532
|
+
)
|
|
533
|
+
},
|
|
534
|
+
assertions={},
|
|
535
|
+
task_duration=1.0,
|
|
536
|
+
total_duration=6.0,
|
|
537
|
+
trace_id='00000000000000000000000000000001',
|
|
538
|
+
span_id='0000000000000003',
|
|
539
|
+
),
|
|
540
|
+
ReportCase(
|
|
541
|
+
name='case2',
|
|
542
|
+
inputs=TaskInput(query='What is the capital of France?'),
|
|
543
|
+
metadata=TaskMetadata(difficulty='medium', category='geography'),
|
|
544
|
+
expected_output=TaskOutput(answer='Paris', confidence=1.0),
|
|
545
|
+
output=TaskOutput(answer='answer to What is the capital of France?', confidence=1.0),
|
|
546
|
+
metrics={},
|
|
547
|
+
attributes={},
|
|
548
|
+
scores={},
|
|
549
|
+
labels={
|
|
550
|
+
'output': EvaluationResult(
|
|
551
|
+
name='output',
|
|
552
|
+
value='a',
|
|
553
|
+
reason=None,
|
|
554
|
+
source=EvaluatorSpec(name='MockEvaluator', arguments=({'output': 'a'},)),
|
|
555
|
+
)
|
|
556
|
+
},
|
|
557
|
+
assertions={},
|
|
558
|
+
task_duration=1.0,
|
|
559
|
+
total_duration=4.0,
|
|
560
|
+
trace_id='00000000000000000000000000000001',
|
|
561
|
+
span_id='0000000000000007',
|
|
562
|
+
),
|
|
563
|
+
],
|
|
564
|
+
)
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
report_adapter = TypeAdapter(EvaluationReport[TaskInput, TaskOutput, TaskMetadata])
|
|
568
|
+
assert report == report_adapter.validate_json(report_adapter.dump_json(report, indent=2))
|
|
569
|
+
|
|
570
|
+
|
|
504
571
|
async def test_genai_attribute_collection(example_dataset: Dataset[TaskInput, TaskOutput, TaskMetadata]):
|
|
505
572
|
async def my_task(inputs: TaskInput) -> TaskOutput:
|
|
506
573
|
with logfire.span(
|
|
@@ -52,11 +52,11 @@ def test_evaluation_result():
|
|
|
52
52
|
evaluator = DummyEvaluator()
|
|
53
53
|
|
|
54
54
|
# Test basic result
|
|
55
|
-
result = EvaluationResult(name='test', value=True, reason='Success', source=evaluator)
|
|
55
|
+
result = EvaluationResult(name='test', value=True, reason='Success', source=evaluator.as_spec())
|
|
56
56
|
assert result.name == 'test'
|
|
57
57
|
assert result.value is True
|
|
58
58
|
assert result.reason == 'Success'
|
|
59
|
-
assert result.source == evaluator
|
|
59
|
+
assert result.source == evaluator.as_spec()
|
|
60
60
|
|
|
61
61
|
# Test downcast with matching type
|
|
62
62
|
downcast = result.downcast(bool)
|
|
@@ -6,7 +6,7 @@ from pydantic import ValidationError
|
|
|
6
6
|
from ..conftest import try_import
|
|
7
7
|
|
|
8
8
|
with try_import() as imports_successful:
|
|
9
|
-
from pydantic_evals.evaluators.
|
|
9
|
+
from pydantic_evals.evaluators.spec import (
|
|
10
10
|
EvaluatorSpec,
|
|
11
11
|
_SerializedEvaluatorSpec, # pyright: ignore[reportPrivateUsage]
|
|
12
12
|
)
|
|
@@ -19,7 +19,6 @@ with try_import() as imports_successful:
|
|
|
19
19
|
from logfire.testing import CaptureLogfire
|
|
20
20
|
|
|
21
21
|
from pydantic_evals.evaluators._run_evaluator import run_evaluator
|
|
22
|
-
from pydantic_evals.evaluators._spec import EvaluatorSpec
|
|
23
22
|
from pydantic_evals.evaluators.common import (
|
|
24
23
|
Contains,
|
|
25
24
|
Equals,
|
|
@@ -36,6 +35,7 @@ with try_import() as imports_successful:
|
|
|
36
35
|
Evaluator,
|
|
37
36
|
EvaluatorOutput,
|
|
38
37
|
)
|
|
38
|
+
from pydantic_evals.evaluators.spec import EvaluatorSpec
|
|
39
39
|
from pydantic_evals.otel._context_in_memory_span_exporter import context_subtree
|
|
40
40
|
from pydantic_evals.otel.span_tree import SpanQuery, SpanTree
|
|
41
41
|
|
|
@@ -162,7 +162,7 @@ async def test_evaluator_call(test_context: EvaluatorContext[TaskInput, TaskOutp
|
|
|
162
162
|
assert results[0].name == 'result'
|
|
163
163
|
assert results[0].value == 'passed'
|
|
164
164
|
assert results[0].reason is None
|
|
165
|
-
assert results[0].source
|
|
165
|
+
assert results[0].source == EvaluatorSpec(name='ExampleEvaluator', arguments=None)
|
|
166
166
|
|
|
167
167
|
|
|
168
168
|
async def test_is_instance_evaluator():
|
|
@@ -242,7 +242,14 @@ async def test_custom_evaluator_name(test_context: EvaluatorContext[TaskInput, T
|
|
|
242
242
|
evaluator = CustomNameFieldEvaluator(result=123, evaluation_name='abc')
|
|
243
243
|
|
|
244
244
|
assert to_jsonable_python(await run_evaluator(evaluator, test_context)) == snapshot(
|
|
245
|
-
[
|
|
245
|
+
[
|
|
246
|
+
{
|
|
247
|
+
'name': 'abc',
|
|
248
|
+
'reason': None,
|
|
249
|
+
'source': {'arguments': {'evaluation_name': 'abc', 'result': 123}, 'name': 'CustomNameFieldEvaluator'},
|
|
250
|
+
'value': 123,
|
|
251
|
+
}
|
|
252
|
+
]
|
|
246
253
|
)
|
|
247
254
|
|
|
248
255
|
@dataclass
|
|
@@ -260,7 +267,14 @@ async def test_custom_evaluator_name(test_context: EvaluatorContext[TaskInput, T
|
|
|
260
267
|
evaluator = CustomNamePropertyEvaluator(result=123, my_name='marcelo')
|
|
261
268
|
|
|
262
269
|
assert to_jsonable_python(await run_evaluator(evaluator, test_context)) == snapshot(
|
|
263
|
-
[
|
|
270
|
+
[
|
|
271
|
+
{
|
|
272
|
+
'name': 'hello marcelo',
|
|
273
|
+
'reason': None,
|
|
274
|
+
'source': {'arguments': {'my_name': 'marcelo', 'result': 123}, 'name': 'CustomNamePropertyEvaluator'},
|
|
275
|
+
'value': 123,
|
|
276
|
+
}
|
|
277
|
+
]
|
|
264
278
|
)
|
|
265
279
|
|
|
266
280
|
|
|
@@ -48,7 +48,7 @@ def sample_assertion(mock_evaluator: Evaluator[TaskInput, TaskOutput, TaskMetada
|
|
|
48
48
|
name='MockEvaluator',
|
|
49
49
|
value=True,
|
|
50
50
|
reason=None,
|
|
51
|
-
source=mock_evaluator,
|
|
51
|
+
source=mock_evaluator.as_spec(),
|
|
52
52
|
)
|
|
53
53
|
|
|
54
54
|
|
|
@@ -57,8 +57,8 @@ def sample_score(mock_evaluator: Evaluator[TaskInput, TaskOutput, TaskMetadata])
|
|
|
57
57
|
return EvaluationResult(
|
|
58
58
|
name='MockEvaluator',
|
|
59
59
|
value=2.5,
|
|
60
|
-
reason=
|
|
61
|
-
source=mock_evaluator,
|
|
60
|
+
reason='my reason',
|
|
61
|
+
source=mock_evaluator.as_spec(),
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
|
|
@@ -68,7 +68,7 @@ def sample_label(mock_evaluator: Evaluator[TaskInput, TaskOutput, TaskMetadata])
|
|
|
68
68
|
name='MockEvaluator',
|
|
69
69
|
value='hello',
|
|
70
70
|
reason=None,
|
|
71
|
-
source=mock_evaluator,
|
|
71
|
+
source=mock_evaluator.as_spec(),
|
|
72
72
|
)
|
|
73
73
|
|
|
74
74
|
|
|
@@ -120,6 +120,7 @@ async def test_evaluation_renderer_basic(sample_report: EvaluationReport):
|
|
|
120
120
|
label_configs={},
|
|
121
121
|
metric_configs={},
|
|
122
122
|
duration_config={},
|
|
123
|
+
include_reasons=False,
|
|
123
124
|
)
|
|
124
125
|
|
|
125
126
|
table = renderer.build_table(sample_report)
|
|
@@ -137,6 +138,43 @@ async def test_evaluation_renderer_basic(sample_report: EvaluationReport):
|
|
|
137
138
|
""")
|
|
138
139
|
|
|
139
140
|
|
|
141
|
+
async def test_evaluation_renderer_with_reasons(sample_report: EvaluationReport):
|
|
142
|
+
"""Test basic functionality of EvaluationRenderer."""
|
|
143
|
+
renderer = EvaluationRenderer(
|
|
144
|
+
include_input=True,
|
|
145
|
+
include_output=True,
|
|
146
|
+
include_metadata=True,
|
|
147
|
+
include_expected_output=True,
|
|
148
|
+
include_durations=True,
|
|
149
|
+
include_total_duration=True,
|
|
150
|
+
include_removed_cases=False,
|
|
151
|
+
include_averages=True,
|
|
152
|
+
input_config={},
|
|
153
|
+
metadata_config={},
|
|
154
|
+
output_config={},
|
|
155
|
+
score_configs={},
|
|
156
|
+
label_configs={},
|
|
157
|
+
metric_configs={},
|
|
158
|
+
duration_config={},
|
|
159
|
+
include_reasons=True,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
table = renderer.build_table(sample_report)
|
|
163
|
+
assert render_table(table) == snapshot("""\
|
|
164
|
+
Evaluation Summary: test_report
|
|
165
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
|
|
166
|
+
┃ Case ID ┃ Inputs ┃ Metadata ┃ Expected Output ┃ Outputs ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Durations ┃
|
|
167
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
|
|
168
|
+
│ test_case │ {'query': 'What is 2+2?'} │ {'difficulty': 'easy'} │ {'answer': '4'} │ {'answer': '4'} │ score1: 2.50 │ label1: hello │ accuracy: 0.950 │ MockEvaluator: ✔ │ task: 0.100 │
|
|
169
|
+
│ │ │ │ │ │ Reason: my reason │ │ │ │ total: 0.200 │
|
|
170
|
+
│ │ │ │ │ │ │ │ │ │ │
|
|
171
|
+
├───────────┼───────────────────────────┼────────────────────────┼─────────────────┼─────────────────┼─────────────────────┼────────────────────────┼─────────────────┼──────────────────┼──────────────┤
|
|
172
|
+
│ Averages │ │ │ │ │ score1: 2.50 │ label1: {'hello': 1.0} │ accuracy: 0.950 │ 100.0% ✔ │ task: 0.100 │
|
|
173
|
+
│ │ │ │ │ │ │ │ │ │ total: 0.200 │
|
|
174
|
+
└───────────┴───────────────────────────┴────────────────────────┴─────────────────┴─────────────────┴─────────────────────┴────────────────────────┴─────────────────┴──────────────────┴──────────────┘
|
|
175
|
+
""")
|
|
176
|
+
|
|
177
|
+
|
|
140
178
|
async def test_evaluation_renderer_with_baseline(sample_report: EvaluationReport):
|
|
141
179
|
"""Test EvaluationRenderer with baseline comparison."""
|
|
142
180
|
baseline_report = EvaluationReport(
|
|
@@ -191,20 +229,21 @@ async def test_evaluation_renderer_with_baseline(sample_report: EvaluationReport
|
|
|
191
229
|
label_configs={},
|
|
192
230
|
metric_configs={},
|
|
193
231
|
duration_config={},
|
|
232
|
+
include_reasons=False,
|
|
194
233
|
)
|
|
195
234
|
|
|
196
235
|
table = renderer.build_diff_table(sample_report, baseline_report)
|
|
197
236
|
assert render_table(table) == snapshot("""\
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
┃ Case ID ┃ Inputs ┃ Metadata ┃ Expected Output ┃ Outputs ┃ Scores ┃ Labels
|
|
201
|
-
|
|
202
|
-
│ test_case │ {'query': 'What is 2+2?'} │ {'difficulty': 'easy'} │ {'answer': '4'} │ {'answer': '4'} │ score1: 2.50 │ label1:
|
|
203
|
-
│ │ │ │ │ │ │
|
|
204
|
-
|
|
205
|
-
│ Averages │ │ │ │ │ score1: 2.50 │ label1: {'hello': 1.0}
|
|
206
|
-
│ │ │ │ │ │ │
|
|
207
|
-
|
|
237
|
+
Evaluation Diff: baseline_report → test_report
|
|
238
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
239
|
+
┃ Case ID ┃ Inputs ┃ Metadata ┃ Expected Output ┃ Outputs ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Durations ┃
|
|
240
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
|
|
241
|
+
│ test_case │ {'query': 'What is 2+2?'} │ {'difficulty': 'easy'} │ {'answer': '4'} │ {'answer': '4'} │ score1: 2.50 │ label1: hello │ accuracy: 0.900 → 0.950 (+0.05 / +5.6%) │ → ✔ │ task: 0.150 → 0.100 (-0.05 / -33.3%) │
|
|
242
|
+
│ │ │ │ │ │ │ │ │ │ total: 0.250 → 0.200 (-0.05 / -20.0%) │
|
|
243
|
+
├───────────┼───────────────────────────┼────────────────────────┼─────────────────┼─────────────────┼──────────────┼────────────────────────┼─────────────────────────────────────────┼──────────────┼───────────────────────────────────────┤
|
|
244
|
+
│ Averages │ │ │ │ │ score1: 2.50 │ label1: {'hello': 1.0} │ accuracy: 0.900 → 0.950 (+0.05 / +5.6%) │ - → 100.0% ✔ │ task: 0.150 → 0.100 (-0.05 / -33.3%) │
|
|
245
|
+
│ │ │ │ │ │ │ │ │ │ total: 0.250 → 0.200 (-0.05 / -20.0%) │
|
|
246
|
+
└───────────┴───────────────────────────┴────────────────────────┴─────────────────┴─────────────────┴──────────────┴────────────────────────┴─────────────────────────────────────────┴──────────────┴───────────────────────────────────────┘
|
|
208
247
|
""")
|
|
209
248
|
|
|
210
249
|
|
|
@@ -248,6 +287,7 @@ async def test_evaluation_renderer_with_removed_cases(sample_report: EvaluationR
|
|
|
248
287
|
label_configs={},
|
|
249
288
|
metric_configs={},
|
|
250
289
|
duration_config={},
|
|
290
|
+
include_reasons=False,
|
|
251
291
|
)
|
|
252
292
|
|
|
253
293
|
table = renderer.build_diff_table(sample_report, baseline_report)
|
|
@@ -311,6 +351,7 @@ async def test_evaluation_renderer_with_custom_configs(sample_report: Evaluation
|
|
|
311
351
|
'diff_increase_style': 'bold red',
|
|
312
352
|
'diff_decrease_style': 'bold green',
|
|
313
353
|
},
|
|
354
|
+
include_reasons=False,
|
|
314
355
|
)
|
|
315
356
|
|
|
316
357
|
table = renderer.build_table(sample_report)
|
|
@@ -350,7 +391,7 @@ async def test_report_case_aggregate_average():
|
|
|
350
391
|
name='MockEvaluator',
|
|
351
392
|
value=0.8,
|
|
352
393
|
reason=None,
|
|
353
|
-
source=MockEvaluator(),
|
|
394
|
+
source=MockEvaluator().as_spec(),
|
|
354
395
|
)
|
|
355
396
|
},
|
|
356
397
|
labels={
|
|
@@ -358,7 +399,7 @@ async def test_report_case_aggregate_average():
|
|
|
358
399
|
name='MockEvaluator',
|
|
359
400
|
value='good',
|
|
360
401
|
reason=None,
|
|
361
|
-
source=MockEvaluator(),
|
|
402
|
+
source=MockEvaluator().as_spec(),
|
|
362
403
|
)
|
|
363
404
|
},
|
|
364
405
|
assertions={
|
|
@@ -366,7 +407,7 @@ async def test_report_case_aggregate_average():
|
|
|
366
407
|
name='MockEvaluator',
|
|
367
408
|
value=True,
|
|
368
409
|
reason=None,
|
|
369
|
-
source=MockEvaluator(),
|
|
410
|
+
source=MockEvaluator().as_spec(),
|
|
370
411
|
)
|
|
371
412
|
},
|
|
372
413
|
task_duration=0.1,
|
|
@@ -387,7 +428,7 @@ async def test_report_case_aggregate_average():
|
|
|
387
428
|
name='MockEvaluator',
|
|
388
429
|
value=0.7,
|
|
389
430
|
reason=None,
|
|
390
|
-
source=MockEvaluator(),
|
|
431
|
+
source=MockEvaluator().as_spec(),
|
|
391
432
|
)
|
|
392
433
|
},
|
|
393
434
|
labels={
|
|
@@ -395,7 +436,7 @@ async def test_report_case_aggregate_average():
|
|
|
395
436
|
name='MockEvaluator',
|
|
396
437
|
value='good',
|
|
397
438
|
reason=None,
|
|
398
|
-
source=MockEvaluator(),
|
|
439
|
+
source=MockEvaluator().as_spec(),
|
|
399
440
|
)
|
|
400
441
|
},
|
|
401
442
|
assertions={
|
|
@@ -403,7 +444,7 @@ async def test_report_case_aggregate_average():
|
|
|
403
444
|
name='MockEvaluator',
|
|
404
445
|
value=False,
|
|
405
446
|
reason=None,
|
|
406
|
-
source=MockEvaluator(),
|
|
447
|
+
source=MockEvaluator().as_spec(),
|
|
407
448
|
)
|
|
408
449
|
},
|
|
409
450
|
task_duration=0.15,
|
|
@@ -57,7 +57,7 @@ def sample_evaluation_result(
|
|
|
57
57
|
name='MockEvaluator',
|
|
58
58
|
value=True,
|
|
59
59
|
reason=None,
|
|
60
|
-
source=mock_evaluator,
|
|
60
|
+
source=mock_evaluator.as_spec(),
|
|
61
61
|
)
|
|
62
62
|
|
|
63
63
|
|
|
@@ -177,7 +177,7 @@ async def test_report_with_error(mock_evaluator: Evaluator[TaskInput, TaskOutput
|
|
|
177
177
|
name='error_evaluator',
|
|
178
178
|
value=False, # No result
|
|
179
179
|
reason='Test error message',
|
|
180
|
-
source=mock_evaluator,
|
|
180
|
+
source=mock_evaluator.as_spec(),
|
|
181
181
|
)
|
|
182
182
|
|
|
183
183
|
# Create a case
|