pydantic-ai 0.2.11__tar.gz → 0.2.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pydantic-ai might be problematic. Click here for more details.
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/PKG-INFO +3 -3
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_evaluator_common.py +82 -2
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_llm_as_a_judge.py +113 -0
- pydantic_ai-0.2.13/tests/models/cassettes/test_openai/test_compatible_api_with_tool_calls_without_id.yaml +159 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_bedrock.py +95 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_gemini.py +81 -11
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_google.py +60 -8
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_openai.py +147 -16
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_openai_responses.py +43 -0
- pydantic_ai-0.2.13/tests/providers/test_azure.py +141 -0
- pydantic_ai-0.2.13/tests/providers/test_bedrock.py +95 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/test_deepseek.py +8 -1
- pydantic_ai-0.2.13/tests/providers/test_fireworks.py +104 -0
- pydantic_ai-0.2.13/tests/providers/test_grok.py +57 -0
- pydantic_ai-0.2.13/tests/providers/test_groq.py +107 -0
- pydantic_ai-0.2.13/tests/providers/test_openrouter.py +154 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/test_provider_names.py +6 -0
- pydantic_ai-0.2.13/tests/providers/test_together.py +100 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_agent.py +645 -3
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_examples.py +94 -1
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_tools.py +1 -1
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/typed_agent.py +50 -22
- pydantic_ai-0.2.11/tests/providers/test_azure.py +0 -72
- pydantic_ai-0.2.11/tests/providers/test_bedrock.py +0 -34
- pydantic_ai-0.2.11/tests/providers/test_groq.py +0 -57
- pydantic_ai-0.2.11/tests/providers/test_openrouter.py +0 -67
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/.gitignore +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/LICENSE +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/Makefile +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/README.md +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/pyproject.toml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/__init__.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/assets/dummy.pdf +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/assets/kiwi.png +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/assets/marcelo.mp3 +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/assets/small_video.mp4 +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_mcp/test_agent_with_stdio_server.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_mcp/test_tool_returning_dict.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_mcp/test_tool_returning_error.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_mcp/test_tool_returning_image.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_mcp/test_tool_returning_image_resource.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_mcp/test_tool_returning_multiple_items.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_mcp/test_tool_returning_none.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_mcp/test_tool_returning_str.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_mcp/test_tool_returning_text_resource.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_settings/test_stop_settings[anthropic].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_settings/test_stop_settings[bedrock].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_settings/test_stop_settings[cohere].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_settings/test_stop_settings[gemini].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_settings/test_stop_settings[groq].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_settings/test_stop_settings[mistral].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/cassettes/test_settings/test_stop_settings[openai].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/conftest.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/__init__.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_dataset.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_evaluator_base.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_evaluator_context.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_evaluator_spec.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_evaluators.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_otel.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_render_numbers.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_reporting.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_reports.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/test_utils.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/evals/utils.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/example_modules/README.md +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/example_modules/bank_database.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/example_modules/fake_database.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/example_modules/weather_service.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/fasta2a/__init__.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/fasta2a/test_applications.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/graph/__init__.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/graph/test_file_persistence.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/graph/test_graph.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/graph/test_mermaid.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/graph/test_persistence.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/graph/test_state.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/graph/test_utils.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/import_examples.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/json_body_serializer.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/mcp_server.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/__init__.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_anthropic/test_anthropic_model_empty_message_on_history.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_anthropic/test_anthropic_model_instructions.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_anthropic/test_document_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_anthropic/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_anthropic/test_extra_headers.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_anthropic/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_anthropic/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_anthropic/test_image_url_input_invalid_mime_type.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_anthropic/test_multiple_parallel_tool_calls.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_anthropic/test_text_document_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_empty_system_prompt.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model_anthropic_model_without_tools.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model_guardrail_config.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model_instructions.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model_iter_stream.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model_max_tokens.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model_other_parameters.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model_performance_config.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model_retry.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model_stream.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model_structured_response.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_model_top_p.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_bedrock_multiple_documents_in_history.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_text_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_text_document_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_video_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_bedrock/test_video_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_cohere/test_cohere_model_instructions.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_cohere/test_request_simple_success_with_vcr.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_gemini_additional_properties_is_false.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_gemini_additional_properties_is_true.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_gemini_drop_exclusive_maximum.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_gemini_exclusive_minimum_and_maximum.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_gemini_model_instructions.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_labels_are_ignored_with_gla_provider.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_video_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini/test_video_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_gemini_vertexai/test_labels.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_document_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_image_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_instructions.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_iter_stream.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_max_tokens.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_multiple_documents_in_history.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_retry.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_safety_settings.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_stream.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_structured_response.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_text_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_text_document_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_thinking_config.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_top_p.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_vertex_labels.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_vertex_provider.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_video_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_google/test_google_model_video_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_groq/test_extra_headers.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_groq/test_groq_model_instructions.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_groq/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_groq/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_groq/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_mistral/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_mistral/test_mistral_model_instructions.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_audio_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_document_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_extra_headers.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_image_url_tool_response.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_max_completion_tokens[gpt-4.5-preview].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_max_completion_tokens[gpt-4o-mini].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_max_completion_tokens[o3-mini].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_multiple_agent_tool_calls.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_openai_audio_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_openai_instructions.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_openai_instructions_with_tool_calls_keep_instructions.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_openai_model_without_system_prompt.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_openai_o1_mini_system_role[developer].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_openai_o1_mini_system_role[system].yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai/test_user_id.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_audio_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_document_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_document_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_builtin_tools.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_http_error.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_instructions.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_retry.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_simple_response.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_simple_response_with_tool_call.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_output_type.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_reasoning_effort.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_reasoning_generate_summary.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_stream.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_system_prompt.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/cassettes/test_openai_responses/test_openai_responses_text_document_url_input.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/mock_async_stream.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_anthropic.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_cohere.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_fallback.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_gemini_vertexai.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_groq.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_instrumented.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_mistral.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_model.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_model_function.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_model_names.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_model_request_parameters.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/models/test_model_test.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/__init__.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/cassettes/test_azure/test_azure_provider_call.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/cassettes/test_google_vertex/test_vertexai_provider.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/cassettes/test_openrouter/test_openrouter_with_google_model.yaml +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/test_anthropic.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/test_cohere.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/test_google_gla.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/test_google_vertex.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/test_mistral.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/providers/test_openai.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_a2a.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_cli.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_deps.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_direct.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_format_as_xml.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_json_body_serializer.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_live.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_logfire.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_mcp.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_messages.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_parts_manager.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_settings.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_streaming.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_usage_limits.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/test_utils.py +0 -0
- {pydantic_ai-0.2.11 → pydantic_ai-0.2.13}/tests/typed_graph.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydantic-ai
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.13
|
|
4
4
|
Summary: Agent Framework / shim to use Pydantic with LLMs
|
|
5
5
|
Project-URL: Homepage, https://ai.pydantic.dev
|
|
6
6
|
Project-URL: Source, https://github.com/pydantic/pydantic-ai
|
|
@@ -28,9 +28,9 @@ Classifier: Topic :: Internet
|
|
|
28
28
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
29
29
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
30
30
|
Requires-Python: >=3.9
|
|
31
|
-
Requires-Dist: pydantic-ai-slim[a2a,anthropic,bedrock,cli,cohere,evals,google,groq,mcp,mistral,openai,vertexai]==0.2.
|
|
31
|
+
Requires-Dist: pydantic-ai-slim[a2a,anthropic,bedrock,cli,cohere,evals,google,groq,mcp,mistral,openai,vertexai]==0.2.13
|
|
32
32
|
Provides-Extra: examples
|
|
33
|
-
Requires-Dist: pydantic-ai-examples==0.2.
|
|
33
|
+
Requires-Dist: pydantic-ai-examples==0.2.13; extra == 'examples'
|
|
34
34
|
Provides-Extra: logfire
|
|
35
35
|
Requires-Dist: logfire>=3.11.0; extra == 'logfire'
|
|
36
36
|
Description-Content-Type: text/markdown
|
|
@@ -208,11 +208,21 @@ async def test_llm_judge_evaluator(mocker: MockerFixture):
|
|
|
208
208
|
mock_judge_input_output = mocker.patch('pydantic_evals.evaluators.llm_as_a_judge.judge_input_output')
|
|
209
209
|
mock_judge_input_output.return_value = mock_grading_output
|
|
210
210
|
|
|
211
|
+
# Mock the judge_input_output_expected function
|
|
212
|
+
mock_judge_input_output_expected = mocker.patch(
|
|
213
|
+
'pydantic_evals.evaluators.llm_as_a_judge.judge_input_output_expected'
|
|
214
|
+
)
|
|
215
|
+
mock_judge_input_output_expected.return_value = mock_grading_output
|
|
216
|
+
|
|
217
|
+
# Mock the judge_output_expected function
|
|
218
|
+
mock_judge_output_expected = mocker.patch('pydantic_evals.evaluators.llm_as_a_judge.judge_output_expected')
|
|
219
|
+
mock_judge_output_expected.return_value = mock_grading_output
|
|
220
|
+
|
|
211
221
|
ctx = EvaluatorContext(
|
|
212
222
|
name='test',
|
|
213
223
|
inputs={'prompt': 'Hello'},
|
|
214
224
|
metadata=None,
|
|
215
|
-
expected_output=
|
|
225
|
+
expected_output='Hello',
|
|
216
226
|
output='Hello world',
|
|
217
227
|
duration=0.0,
|
|
218
228
|
_span_tree=SpanTreeRecordingError('spans were not recorded'),
|
|
@@ -238,6 +248,29 @@ async def test_llm_judge_evaluator(mocker: MockerFixture):
|
|
|
238
248
|
{'prompt': 'Hello'}, 'Hello world', 'Output contains input', 'openai:gpt-4o', None
|
|
239
249
|
)
|
|
240
250
|
|
|
251
|
+
# Test with input and expected output
|
|
252
|
+
evaluator = LLMJudge(
|
|
253
|
+
rubric='Output contains input', include_input=True, include_expected_output=True, model='openai:gpt-4o'
|
|
254
|
+
)
|
|
255
|
+
assert to_jsonable_python(await evaluator.evaluate(ctx)) == snapshot(
|
|
256
|
+
{'LLMJudge': {'value': True, 'reason': 'Test passed'}}
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
mock_judge_input_output_expected.assert_called_once_with(
|
|
260
|
+
{'prompt': 'Hello'}, 'Hello world', 'Hello', 'Output contains input', 'openai:gpt-4o', None
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Test with output and expected output
|
|
264
|
+
evaluator = LLMJudge(
|
|
265
|
+
rubric='Output contains input', include_input=False, include_expected_output=True, model='openai:gpt-4o'
|
|
266
|
+
)
|
|
267
|
+
assert to_jsonable_python(await evaluator.evaluate(ctx)) == snapshot(
|
|
268
|
+
{'LLMJudge': {'value': True, 'reason': 'Test passed'}}
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
mock_judge_output_expected.assert_called_once_with(
|
|
272
|
+
'Hello world', 'Hello', 'Output contains input', 'openai:gpt-4o', None
|
|
273
|
+
)
|
|
241
274
|
# Test with failing result
|
|
242
275
|
mock_grading_output.score = 0.0
|
|
243
276
|
mock_grading_output.pass_ = False
|
|
@@ -273,13 +306,21 @@ async def test_llm_judge_evaluator_with_model_settings(mocker: MockerFixture):
|
|
|
273
306
|
mock_judge_input_output = mocker.patch('pydantic_evals.evaluators.llm_as_a_judge.judge_input_output')
|
|
274
307
|
mock_judge_input_output.return_value = mock_grading_output
|
|
275
308
|
|
|
309
|
+
mock_judge_input_output_expected = mocker.patch(
|
|
310
|
+
'pydantic_evals.evaluators.llm_as_a_judge.judge_input_output_expected'
|
|
311
|
+
)
|
|
312
|
+
mock_judge_input_output_expected.return_value = mock_grading_output
|
|
313
|
+
|
|
314
|
+
mock_judge_output_expected = mocker.patch('pydantic_evals.evaluators.llm_as_a_judge.judge_output_expected')
|
|
315
|
+
mock_judge_output_expected.return_value = mock_grading_output
|
|
316
|
+
|
|
276
317
|
custom_model_settings = ModelSettings(temperature=0.77)
|
|
277
318
|
|
|
278
319
|
ctx = EvaluatorContext(
|
|
279
320
|
name='test_custom_settings',
|
|
280
321
|
inputs={'prompt': 'Hello Custom'},
|
|
281
322
|
metadata=None,
|
|
282
|
-
expected_output=
|
|
323
|
+
expected_output='Hello',
|
|
283
324
|
output='Hello world custom settings',
|
|
284
325
|
duration=0.0,
|
|
285
326
|
_span_tree=SpanTreeRecordingError('spans were not recorded'),
|
|
@@ -314,6 +355,45 @@ async def test_llm_judge_evaluator_with_model_settings(mocker: MockerFixture):
|
|
|
314
355
|
custom_model_settings,
|
|
315
356
|
)
|
|
316
357
|
|
|
358
|
+
# Test with input and expected output, with custom model_settings
|
|
359
|
+
evaluator_with_input_expected = LLMJudge(
|
|
360
|
+
rubric='Output contains input with custom settings',
|
|
361
|
+
include_input=True,
|
|
362
|
+
include_expected_output=True,
|
|
363
|
+
model='openai:gpt-3.5-turbo',
|
|
364
|
+
model_settings=custom_model_settings,
|
|
365
|
+
)
|
|
366
|
+
assert to_jsonable_python(await evaluator_with_input_expected.evaluate(ctx)) == snapshot(
|
|
367
|
+
{'LLMJudge': {'value': True, 'reason': 'Test passed with settings'}}
|
|
368
|
+
)
|
|
369
|
+
mock_judge_input_output_expected.assert_called_once_with(
|
|
370
|
+
{'prompt': 'Hello Custom'},
|
|
371
|
+
'Hello world custom settings',
|
|
372
|
+
'Hello',
|
|
373
|
+
'Output contains input with custom settings',
|
|
374
|
+
'openai:gpt-3.5-turbo',
|
|
375
|
+
custom_model_settings,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
# Test with output and expected output
|
|
379
|
+
evaluator_with_output_expected = LLMJudge(
|
|
380
|
+
rubric='Output contains input with custom settings',
|
|
381
|
+
include_input=False,
|
|
382
|
+
include_expected_output=True,
|
|
383
|
+
model='openai:gpt-3.5-turbo',
|
|
384
|
+
model_settings=custom_model_settings,
|
|
385
|
+
)
|
|
386
|
+
assert to_jsonable_python(await evaluator_with_output_expected.evaluate(ctx)) == snapshot(
|
|
387
|
+
{'LLMJudge': {'value': True, 'reason': 'Test passed with settings'}}
|
|
388
|
+
)
|
|
389
|
+
mock_judge_output_expected.assert_called_once_with(
|
|
390
|
+
'Hello world custom settings',
|
|
391
|
+
'Hello',
|
|
392
|
+
'Output contains input with custom settings',
|
|
393
|
+
'openai:gpt-3.5-turbo',
|
|
394
|
+
custom_model_settings,
|
|
395
|
+
)
|
|
396
|
+
|
|
317
397
|
|
|
318
398
|
async def test_python():
|
|
319
399
|
"""Test Python evaluator."""
|
|
@@ -11,7 +11,9 @@ with try_import() as imports_successful:
|
|
|
11
11
|
GradingOutput,
|
|
12
12
|
_stringify, # pyright: ignore[reportPrivateUsage]
|
|
13
13
|
judge_input_output,
|
|
14
|
+
judge_input_output_expected,
|
|
14
15
|
judge_output,
|
|
16
|
+
judge_output_expected,
|
|
15
17
|
)
|
|
16
18
|
|
|
17
19
|
pytestmark = [pytest.mark.skipif(not imports_successful(), reason='pydantic-evals not installed'), pytest.mark.anyio]
|
|
@@ -167,3 +169,114 @@ async def test_judge_input_output_with_model_settings_mock(mocker: MockerFixture
|
|
|
167
169
|
assert call_kwargs['model_settings'] == test_model_settings
|
|
168
170
|
# Check if 'model' kwarg is passed, its value will be the default model or None
|
|
169
171
|
assert 'model' in call_kwargs
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@pytest.mark.anyio
|
|
175
|
+
async def test_judge_input_output_expected_mock(mocker: MockerFixture):
|
|
176
|
+
"""Test judge_input_output_expected function with mocked agent."""
|
|
177
|
+
# Mock the agent run method
|
|
178
|
+
mock_result = mocker.MagicMock()
|
|
179
|
+
mock_result.output = GradingOutput(reason='Test passed', pass_=True, score=1.0)
|
|
180
|
+
mock_run = mocker.patch('pydantic_ai.Agent.run', return_value=mock_result)
|
|
181
|
+
|
|
182
|
+
# Test with string input and output
|
|
183
|
+
result = await judge_input_output_expected('Hello', 'Hello world', 'Hello', 'Output contains input')
|
|
184
|
+
assert isinstance(result, GradingOutput)
|
|
185
|
+
assert result.reason == 'Test passed'
|
|
186
|
+
assert result.pass_ is True
|
|
187
|
+
assert result.score == 1.0
|
|
188
|
+
|
|
189
|
+
# Verify the agent was called with correct prompt
|
|
190
|
+
mock_run.assert_called_once()
|
|
191
|
+
call_args = mock_run.call_args[0]
|
|
192
|
+
assert '<Input>\nHello\n</Input>' in call_args[0]
|
|
193
|
+
assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
|
|
194
|
+
assert '<Output>\nHello world\n</Output>' in call_args[0]
|
|
195
|
+
assert '<Rubric>\nOutput contains input\n</Rubric>' in call_args[0]
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@pytest.mark.anyio
|
|
199
|
+
async def test_judge_input_output_expected_with_model_settings_mock(mocker: MockerFixture):
|
|
200
|
+
"""Test judge_input_output_expected function with model_settings and mocked agent."""
|
|
201
|
+
mock_result = mocker.MagicMock()
|
|
202
|
+
mock_result.output = GradingOutput(reason='Test passed with settings', pass_=True, score=1.0)
|
|
203
|
+
mock_run = mocker.patch('pydantic_ai.Agent.run', return_value=mock_result)
|
|
204
|
+
|
|
205
|
+
test_model_settings = ModelSettings(temperature=1)
|
|
206
|
+
|
|
207
|
+
result = await judge_input_output_expected(
|
|
208
|
+
'Hello settings',
|
|
209
|
+
'Hello world with settings',
|
|
210
|
+
'Hello',
|
|
211
|
+
'Output contains input with settings',
|
|
212
|
+
model_settings=test_model_settings,
|
|
213
|
+
)
|
|
214
|
+
assert isinstance(result, GradingOutput)
|
|
215
|
+
assert result.reason == 'Test passed with settings'
|
|
216
|
+
assert result.pass_ is True
|
|
217
|
+
assert result.score == 1.0
|
|
218
|
+
|
|
219
|
+
mock_run.assert_called_once()
|
|
220
|
+
call_args, call_kwargs = mock_run.call_args
|
|
221
|
+
assert '<Input>\nHello settings\n</Input>' in call_args[0]
|
|
222
|
+
assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
|
|
223
|
+
assert '<Output>\nHello world with settings\n</Output>' in call_args[0]
|
|
224
|
+
assert '<Rubric>\nOutput contains input with settings\n</Rubric>' in call_args[0]
|
|
225
|
+
assert call_kwargs['model_settings'] == test_model_settings
|
|
226
|
+
# Check if 'model' kwarg is passed, its value will be the default model or None
|
|
227
|
+
assert 'model' in call_kwargs
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
@pytest.mark.anyio
|
|
231
|
+
async def test_judge_output_expected_mock(mocker: MockerFixture):
|
|
232
|
+
"""Test judge_output_expected function with mocked agent."""
|
|
233
|
+
# Mock the agent run method
|
|
234
|
+
mock_result = mocker.MagicMock()
|
|
235
|
+
mock_result.output = GradingOutput(reason='Test passed', pass_=True, score=1.0)
|
|
236
|
+
mock_run = mocker.patch('pydantic_ai.Agent.run', return_value=mock_result)
|
|
237
|
+
|
|
238
|
+
# Test with string output and expected output
|
|
239
|
+
result = await judge_output_expected('Hello world', 'Hello', 'Output contains input')
|
|
240
|
+
assert isinstance(result, GradingOutput)
|
|
241
|
+
assert result.reason == 'Test passed'
|
|
242
|
+
assert result.pass_ is True
|
|
243
|
+
assert result.score == 1.0
|
|
244
|
+
|
|
245
|
+
# Verify the agent was called with correct prompt
|
|
246
|
+
mock_run.assert_called_once()
|
|
247
|
+
call_args = mock_run.call_args[0]
|
|
248
|
+
assert '<Input>' not in call_args[0]
|
|
249
|
+
assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
|
|
250
|
+
assert '<Output>\nHello world\n</Output>' in call_args[0]
|
|
251
|
+
assert '<Rubric>\nOutput contains input\n</Rubric>' in call_args[0]
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
@pytest.mark.anyio
|
|
255
|
+
async def test_judge_output_expected_with_model_settings_mock(mocker: MockerFixture):
|
|
256
|
+
"""Test judge_output_expected function with model_settings and mocked agent."""
|
|
257
|
+
mock_result = mocker.MagicMock()
|
|
258
|
+
mock_result.output = GradingOutput(reason='Test passed with settings', pass_=True, score=1.0)
|
|
259
|
+
mock_run = mocker.patch('pydantic_ai.Agent.run', return_value=mock_result)
|
|
260
|
+
|
|
261
|
+
test_model_settings = ModelSettings(temperature=1)
|
|
262
|
+
|
|
263
|
+
result = await judge_output_expected(
|
|
264
|
+
'Hello world with settings',
|
|
265
|
+
'Hello',
|
|
266
|
+
'Output contains input with settings',
|
|
267
|
+
model_settings=test_model_settings,
|
|
268
|
+
)
|
|
269
|
+
assert isinstance(result, GradingOutput)
|
|
270
|
+
assert result.reason == 'Test passed with settings'
|
|
271
|
+
assert result.pass_ is True
|
|
272
|
+
assert result.score == 1.0
|
|
273
|
+
|
|
274
|
+
mock_run.assert_called_once()
|
|
275
|
+
call_args, call_kwargs = mock_run.call_args
|
|
276
|
+
assert '<Input>' not in call_args[0]
|
|
277
|
+
assert '<ExpectedOutput>\nHello\n</ExpectedOutput>' in call_args[0]
|
|
278
|
+
assert '<Output>\nHello world with settings\n</Output>' in call_args[0]
|
|
279
|
+
assert '<Rubric>\nOutput contains input with settings\n</Rubric>' in call_args[0]
|
|
280
|
+
assert call_kwargs['model_settings'] == test_model_settings
|
|
281
|
+
# Check if 'model' kwarg is passed, its value will be the default model or None
|
|
282
|
+
assert 'model' in call_kwargs
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
interactions:
|
|
2
|
+
- request:
|
|
3
|
+
headers:
|
|
4
|
+
accept:
|
|
5
|
+
- application/json
|
|
6
|
+
accept-encoding:
|
|
7
|
+
- gzip, deflate
|
|
8
|
+
connection:
|
|
9
|
+
- keep-alive
|
|
10
|
+
content-length:
|
|
11
|
+
- '326'
|
|
12
|
+
content-type:
|
|
13
|
+
- application/json
|
|
14
|
+
host:
|
|
15
|
+
- generativelanguage.googleapis.com
|
|
16
|
+
method: POST
|
|
17
|
+
parsed_body:
|
|
18
|
+
messages:
|
|
19
|
+
- content: What is the current time?
|
|
20
|
+
role: user
|
|
21
|
+
model: gemini-2.5-pro-preview-05-06
|
|
22
|
+
stream: false
|
|
23
|
+
tool_choice: auto
|
|
24
|
+
tools:
|
|
25
|
+
- function:
|
|
26
|
+
description: Get the current time.
|
|
27
|
+
name: get_current_time
|
|
28
|
+
parameters:
|
|
29
|
+
additionalProperties: false
|
|
30
|
+
properties: {}
|
|
31
|
+
type: object
|
|
32
|
+
type: function
|
|
33
|
+
uri: https://generativelanguage.googleapis.com/v1beta/openai/chat/completions
|
|
34
|
+
response:
|
|
35
|
+
headers:
|
|
36
|
+
alt-svc:
|
|
37
|
+
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
|
|
38
|
+
content-length:
|
|
39
|
+
- '1166'
|
|
40
|
+
content-type:
|
|
41
|
+
- application/json
|
|
42
|
+
server-timing:
|
|
43
|
+
- gfet4t7; dur=1609
|
|
44
|
+
transfer-encoding:
|
|
45
|
+
- chunked
|
|
46
|
+
vary:
|
|
47
|
+
- Origin
|
|
48
|
+
- X-Origin
|
|
49
|
+
- Referer
|
|
50
|
+
parsed_body:
|
|
51
|
+
choices:
|
|
52
|
+
- finish_reason: tool_calls
|
|
53
|
+
index: 0
|
|
54
|
+
message:
|
|
55
|
+
extra_content:
|
|
56
|
+
google:
|
|
57
|
+
thought: true
|
|
58
|
+
thought_signature: AVSoXO4AzAXs7GGvOY63fp8CwJK3yR8HbUPhxhfN2HaPvJnscmZCkaWvckz5NL3nIMK+si/baQcsM2Q8ME9V1RQrb3w1IKceWfjO3kHPL11odY/p6Us4GkkvJqKU/OgUnbAMbuvNdX1pyXWZUrQ7WyXZ5F4mjbxBSCLiVOTdFlK53zn+ajq5JIuG9AYHgwE/sJxUUpvNd6RcWvZR3fQb8gufjCspiO2ZdInRcdGsz/+XftFHxFbXkdtCRAw74AtjlN5osb+KgDYojdohKIEit9DcTBe7hI7oEHWMfnqYSgGrrad4FJpNB3jXmSFevE2iYYKUBzWvxJNj8fIYrCC0g4rJ1aJvuoU=
|
|
59
|
+
role: assistant
|
|
60
|
+
thought_signature: AVSoXO4AzAXs7GGvOY63fp8CwJK3yR8HbUPhxhfN2HaPvJnscmZCkaWvckz5NL3nIMK+si/baQcsM2Q8ME9V1RQrb3w1IKceWfjO3kHPL11odY/p6Us4GkkvJqKU/OgUnbAMbuvNdX1pyXWZUrQ7WyXZ5F4mjbxBSCLiVOTdFlK53zn+ajq5JIuG9AYHgwE/sJxUUpvNd6RcWvZR3fQb8gufjCspiO2ZdInRcdGsz/+XftFHxFbXkdtCRAw74AtjlN5osb+KgDYojdohKIEit9DcTBe7hI7oEHWMfnqYSgGrrad4FJpNB3jXmSFevE2iYYKUBzWvxJNj8fIYrCC0g4rJ1aJvuoU=
|
|
61
|
+
tool_calls:
|
|
62
|
+
- function:
|
|
63
|
+
arguments: '{}'
|
|
64
|
+
name: get_current_time
|
|
65
|
+
id: ''
|
|
66
|
+
type: function
|
|
67
|
+
created: 1748902365
|
|
68
|
+
id: 3SE-aKjdCcCEz7IPxpqjCA
|
|
69
|
+
model: gemini-2.5-pro-preview-05-06
|
|
70
|
+
object: chat.completion
|
|
71
|
+
usage:
|
|
72
|
+
completion_tokens: 12
|
|
73
|
+
prompt_tokens: 35
|
|
74
|
+
total_tokens: 109
|
|
75
|
+
status:
|
|
76
|
+
code: 200
|
|
77
|
+
message: OK
|
|
78
|
+
- request:
|
|
79
|
+
headers:
|
|
80
|
+
accept:
|
|
81
|
+
- application/json
|
|
82
|
+
accept-encoding:
|
|
83
|
+
- gzip, deflate
|
|
84
|
+
connection:
|
|
85
|
+
- keep-alive
|
|
86
|
+
content-length:
|
|
87
|
+
- '575'
|
|
88
|
+
content-type:
|
|
89
|
+
- application/json
|
|
90
|
+
host:
|
|
91
|
+
- generativelanguage.googleapis.com
|
|
92
|
+
method: POST
|
|
93
|
+
parsed_body:
|
|
94
|
+
messages:
|
|
95
|
+
- content: What is the current time?
|
|
96
|
+
role: user
|
|
97
|
+
- role: assistant
|
|
98
|
+
tool_calls:
|
|
99
|
+
- function:
|
|
100
|
+
arguments: '{}'
|
|
101
|
+
name: get_current_time
|
|
102
|
+
id: pyd_ai_cee885c699414386a7e14b7ec43cadbc
|
|
103
|
+
type: function
|
|
104
|
+
- content: Noon
|
|
105
|
+
role: tool
|
|
106
|
+
tool_call_id: pyd_ai_cee885c699414386a7e14b7ec43cadbc
|
|
107
|
+
model: gemini-2.5-pro-preview-05-06
|
|
108
|
+
stream: false
|
|
109
|
+
tool_choice: auto
|
|
110
|
+
tools:
|
|
111
|
+
- function:
|
|
112
|
+
description: Get the current time.
|
|
113
|
+
name: get_current_time
|
|
114
|
+
parameters:
|
|
115
|
+
additionalProperties: false
|
|
116
|
+
properties: {}
|
|
117
|
+
type: object
|
|
118
|
+
type: function
|
|
119
|
+
uri: https://generativelanguage.googleapis.com/v1beta/openai/chat/completions
|
|
120
|
+
response:
|
|
121
|
+
headers:
|
|
122
|
+
alt-svc:
|
|
123
|
+
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
|
|
124
|
+
content-length:
|
|
125
|
+
- '755'
|
|
126
|
+
content-type:
|
|
127
|
+
- application/json
|
|
128
|
+
server-timing:
|
|
129
|
+
- gfet4t7; dur=1097
|
|
130
|
+
transfer-encoding:
|
|
131
|
+
- chunked
|
|
132
|
+
vary:
|
|
133
|
+
- Origin
|
|
134
|
+
- X-Origin
|
|
135
|
+
- Referer
|
|
136
|
+
parsed_body:
|
|
137
|
+
choices:
|
|
138
|
+
- finish_reason: stop
|
|
139
|
+
index: 0
|
|
140
|
+
message:
|
|
141
|
+
content: The current time is Noon.
|
|
142
|
+
extra_content:
|
|
143
|
+
google:
|
|
144
|
+
thought: true
|
|
145
|
+
thought_signature: AVSoXO4/lu90Bn7IxVcWAjD6KH3ZHMmsCX1tnPJERDI6SZb63hrSEtmJT/v+sn2SzlecMoXBVmtcrd3keFszUgDpLjFm1gB+uMzLS1IqPdEAh+m5S71k1hfStNMFen63UnphYHWt4UrjVHXckysRLVJjCuMmE01hQXcVh9b3YXvfWfZEFA==
|
|
146
|
+
role: assistant
|
|
147
|
+
thought_signature: AVSoXO4/lu90Bn7IxVcWAjD6KH3ZHMmsCX1tnPJERDI6SZb63hrSEtmJT/v+sn2SzlecMoXBVmtcrd3keFszUgDpLjFm1gB+uMzLS1IqPdEAh+m5S71k1hfStNMFen63UnphYHWt4UrjVHXckysRLVJjCuMmE01hQXcVh9b3YXvfWfZEFA==
|
|
148
|
+
created: 1748902366
|
|
149
|
+
id: 3iE-aNK3EIGJz7IPt_mYoAs
|
|
150
|
+
model: gemini-2.5-pro-preview-05-06
|
|
151
|
+
object: chat.completion
|
|
152
|
+
usage:
|
|
153
|
+
completion_tokens: 6
|
|
154
|
+
prompt_tokens: 66
|
|
155
|
+
total_tokens: 100
|
|
156
|
+
status:
|
|
157
|
+
code: 200
|
|
158
|
+
message: OK
|
|
159
|
+
version: 1
|
|
@@ -31,6 +31,8 @@ from pydantic_ai.messages import (
|
|
|
31
31
|
UserPromptPart,
|
|
32
32
|
VideoUrl,
|
|
33
33
|
)
|
|
34
|
+
from pydantic_ai.models import ModelRequestParameters
|
|
35
|
+
from pydantic_ai.tools import ToolDefinition
|
|
34
36
|
from pydantic_ai.usage import Usage
|
|
35
37
|
|
|
36
38
|
from ..conftest import IsDatetime, try_import
|
|
@@ -631,3 +633,96 @@ async def test_bedrock_group_consecutive_tool_return_parts(bedrock_provider: Bed
|
|
|
631
633
|
},
|
|
632
634
|
]
|
|
633
635
|
)
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
async def test_bedrock_mistral_tool_result_format(bedrock_provider: BedrockProvider):
|
|
639
|
+
now = datetime.datetime.now()
|
|
640
|
+
req = [
|
|
641
|
+
ModelRequest(
|
|
642
|
+
parts=[
|
|
643
|
+
ToolReturnPart(tool_name='tool1', content={'foo': 'bar'}, tool_call_id='id1', timestamp=now),
|
|
644
|
+
]
|
|
645
|
+
),
|
|
646
|
+
]
|
|
647
|
+
|
|
648
|
+
# Models other than Mistral support toolResult.content with text, not json
|
|
649
|
+
model = BedrockConverseModel('us.amazon.nova-micro-v1:0', provider=bedrock_provider)
|
|
650
|
+
# Call the mapping function directly
|
|
651
|
+
_, bedrock_messages = await model._map_messages(req) # type: ignore[reportPrivateUsage]
|
|
652
|
+
|
|
653
|
+
assert bedrock_messages == snapshot(
|
|
654
|
+
[
|
|
655
|
+
{
|
|
656
|
+
'role': 'user',
|
|
657
|
+
'content': [
|
|
658
|
+
{'toolResult': {'toolUseId': 'id1', 'content': [{'text': '{"foo":"bar"}'}], 'status': 'success'}},
|
|
659
|
+
],
|
|
660
|
+
},
|
|
661
|
+
]
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
# Mistral requires toolResult.content to hold json, not text
|
|
665
|
+
model = BedrockConverseModel('mistral.mistral-7b-instruct-v0:2', provider=bedrock_provider)
|
|
666
|
+
# Call the mapping function directly
|
|
667
|
+
_, bedrock_messages = await model._map_messages(req) # type: ignore[reportPrivateUsage]
|
|
668
|
+
|
|
669
|
+
assert bedrock_messages == snapshot(
|
|
670
|
+
[
|
|
671
|
+
{
|
|
672
|
+
'role': 'user',
|
|
673
|
+
'content': [
|
|
674
|
+
{'toolResult': {'toolUseId': 'id1', 'content': [{'json': {'foo': 'bar'}}], 'status': 'success'}},
|
|
675
|
+
],
|
|
676
|
+
},
|
|
677
|
+
]
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
async def test_bedrock_anthropic_no_tool_choice(bedrock_provider: BedrockProvider):
|
|
682
|
+
my_tool = ToolDefinition(
|
|
683
|
+
'my_tool',
|
|
684
|
+
'This is my tool',
|
|
685
|
+
{'type': 'object', 'title': 'Result', 'properties': {'spam': {'type': 'number'}}},
|
|
686
|
+
)
|
|
687
|
+
mrp = ModelRequestParameters(function_tools=[my_tool], allow_text_output=False, output_tools=[])
|
|
688
|
+
|
|
689
|
+
# Models other than Anthropic support tool_choice
|
|
690
|
+
model = BedrockConverseModel('us.amazon.nova-micro-v1:0', provider=bedrock_provider)
|
|
691
|
+
tool_config = model._map_tool_config(mrp) # type: ignore[reportPrivateUsage]
|
|
692
|
+
|
|
693
|
+
assert tool_config == snapshot(
|
|
694
|
+
{
|
|
695
|
+
'tools': [
|
|
696
|
+
{
|
|
697
|
+
'toolSpec': {
|
|
698
|
+
'name': 'my_tool',
|
|
699
|
+
'description': 'This is my tool',
|
|
700
|
+
'inputSchema': {
|
|
701
|
+
'json': {'type': 'object', 'title': 'Result', 'properties': {'spam': {'type': 'number'}}}
|
|
702
|
+
},
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
],
|
|
706
|
+
'toolChoice': {'any': {}},
|
|
707
|
+
}
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
# Anthropic models don't support tool_choice
|
|
711
|
+
model = BedrockConverseModel('us.anthropic.claude-3-7-sonnet-20250219-v1:0', provider=bedrock_provider)
|
|
712
|
+
tool_config = model._map_tool_config(mrp) # type: ignore[reportPrivateUsage]
|
|
713
|
+
|
|
714
|
+
assert tool_config == snapshot(
|
|
715
|
+
{
|
|
716
|
+
'tools': [
|
|
717
|
+
{
|
|
718
|
+
'toolSpec': {
|
|
719
|
+
'name': 'my_tool',
|
|
720
|
+
'description': 'This is my tool',
|
|
721
|
+
'inputSchema': {
|
|
722
|
+
'json': {'type': 'object', 'title': 'Result', 'properties': {'spam': {'type': 'number'}}}
|
|
723
|
+
},
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
]
|
|
727
|
+
}
|
|
728
|
+
)
|