pydantic-ai 1.6.0__tar.gz → 1.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pydantic-ai might be problematic. Click here for more details.
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/PKG-INFO +6 -6
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/README.md +1 -1
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/pyproject.toml +5 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_settings/test_stop_settings[anthropic].yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/conftest.py +13 -1
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_dataset.py +24 -1
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_reporting.py +412 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_empty_message_on_history.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_thinking_part.yaml +4 -4
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_thinking_part_redacted.yaml +4 -4
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_thinking_part_redacted_stream.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_thinking_part_stream.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_prompted_output.yaml +4 -4
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_prompted_output_multiple.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_server_tool_pass_history_to_another_provider.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_text_output_function.yaml +4 -4
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_text_parts_ahead_of_built_in_tool_call.yaml +8 -8
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_tool_output.yaml +4 -4
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_document_binary_content_input.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_document_url_input.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_extra_headers.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_image_as_binary_content_tool_response.yaml +4 -4
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_image_url_input.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_image_url_input_invalid_mime_type.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_multiple_parallel_tool_calls.yaml +4 -4
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_text_document_url_input.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_anthropic.py +52 -52
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_model.py +9 -9
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_openai.py +27 -1
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_openai_responses.py +2 -2
- pydantic_ai-1.8.0/tests/models/test_outlines.py +779 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/cassettes/test_gateway/test_gateway_provider_with_anthropic.yaml +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_gateway.py +2 -2
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_litellm.py +1 -1
- pydantic_ai-1.8.0/tests/providers/test_outlines.py +36 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_provider_names.py +2 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_vercel.py +4 -4
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_builtin_tools.py +4 -4
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_live.py +1 -1
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_streaming.py +1 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/.gitignore +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/LICENSE +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/Makefile +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/__init__.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/assets/dummy.pdf +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/assets/dummy.txt +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/assets/kiwi.png +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/assets/marcelo.mp3 +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/assets/product_name.txt +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/assets/small_video.mp4 +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_complex_agent_run.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_complex_agent_run_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_complex_agent_run_stream_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_iter.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_iter_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_override_deps_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_override_tools_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_run.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_run_in_workflow_with_toolsets.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_run_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_run_sync.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_run_sync_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_with_dataclass_deps_as_dict.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_with_hitl_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_with_hitl_tool_sync.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_with_model_retry.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_with_non_dict_deps.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_agent_with_unserializable_deps_type.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_dbos_model_stream_direct.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_multiple_agents.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_dbos/test_simple_agent_run_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_agent_with_server_not_running.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_agent_with_stdio_server.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_audio_resource.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_audio_resource_link.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_dict.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_error.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_image.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_image_resource.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_image_resource_link.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_multiple_items.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_none.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_str.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_text_resource.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_text_resource_link.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_mcp/test_tool_returning_unstructured_dict.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_complex_agent_run_in_flow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_iter_in_flow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_multiple_agents.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_iter.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_override_deps.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_override_tools.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_override_toolsets.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_run.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_run_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_run_stream_events.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_run_sync.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_with_hitl_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_with_hitl_tool_sync.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_with_model_retry.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_prefect_agent_with_unserializable_deps.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_run_stream_in_flow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_run_sync_in_flow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_prefect/test_simple_agent_run_in_flow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_settings/test_stop_settings[bedrock].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_settings/test_stop_settings[cohere].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_settings/test_stop_settings[gemini].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_settings/test_stop_settings[google].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_settings/test_stop_settings[groq].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_settings/test_stop_settings[mistral].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_settings/test_stop_settings[openai].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_complex_agent_run.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_complex_agent_run_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_complex_agent_run_stream_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_logfire_plugin.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_multiple_agents.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_simple_agent_run_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_iter.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_override_deps_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_run.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_run_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_run_stream_events.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_run_sync.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_sync_tool_activity_disabled.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_with_dataclass_deps_as_dict.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_with_hitl_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_with_model_retry.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_with_non_dict_deps.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_temporal_agent_with_unserializable_deps_type.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/cassettes/test_temporal/test_web_search_agent_run_in_workflow.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/__init__.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_evaluator_base.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_evaluator_common.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_evaluator_context.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_evaluator_spec.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_evaluators.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_llm_as_a_judge.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_otel.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_render_numbers.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_reports.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/test_utils.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/evals/utils.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/example_modules/README.md +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/example_modules/bank_database.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/example_modules/fake_database.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/example_modules/mcp_server.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/example_modules/weather_service.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/ext/__init__.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/ext/test_langchain.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/__init__.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/__init__.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_broadcast_and_spread.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_decisions.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_edge_cases.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_edge_labels.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_graph_builder.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_graph_edge_cases.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_graph_execution.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_graph_iteration.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_joins_and_reducers.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_mermaid_rendering.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_node_and_step.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_parent_forks.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_paths.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_util.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/beta/test_v1_v2_integration.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/test_file_persistence.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/test_graph.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/test_mermaid.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/test_persistence.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/test_state.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/graph/test_utils.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/import_examples.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/json_body_serializer.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/mcp_server.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/__init__.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_code_execution_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_code_execution_tool_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_mcp_servers.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_mcp_servers_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_memory_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_instructions.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_thinking_part_from_other_model.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_model_web_search_tool_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_output_tool_with_thinking.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_server_tool_receive_history_from_another_provider.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_tool_with_thinking.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_web_search_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_anthropic/test_anthropic_web_search_tool_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_anthropic_tool_with_thinking.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_empty_system_prompt.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_anthropic_model_without_tools.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_guardrail_config.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_instructions.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_iter_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_max_tokens.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_other_parameters.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_performance_config.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_retry.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_stream_empty_text_delta.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_structured_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_thinking_part_anthropic.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_thinking_part_deepseek.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_thinking_part_from_other_model.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_thinking_part_redacted.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_thinking_part_redacted_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_thinking_part_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_model_top_p.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_bedrock_multiple_documents_in_history.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_document_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_image_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_text_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_text_document_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_video_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_bedrock/test_video_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_cohere/test_cohere_model_instructions.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_cohere/test_cohere_model_thinking_part.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_cohere/test_request_simple_success_with_vcr.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_deepseek/test_deepseek_model_thinking_part.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_deepseek/test_deepseek_model_thinking_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_download_item/test_download_item_application_octet_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_download_item/test_download_item_audio_mpeg.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_download_item/test_download_item_no_content_type.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_document_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_additional_properties_is_false.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_additional_properties_is_true.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_drop_exclusive_maximum.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_exclusive_minimum_and_maximum.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_model_instructions.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_model_thinking_part.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_native_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_native_output_multiple.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_prompted_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_prompted_output_multiple.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_prompted_output_with_tools.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_text_output_function.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_tool_config_any_with_tool_without_args.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_tool_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_gemini_youtube_video_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_image_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_labels_are_ignored_with_gla_provider.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_video_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini/test_video_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini_vertex/test_labels.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[AudioUrl (gs)].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[AudioUrl].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[DocumentUrl (gs)].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[DocumentUrl].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[ImageUrl (gs)].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[ImageUrl].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[VideoUrl (YouTube)].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[VideoUrl (gs)].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini_vertex/test_url_input[VideoUrl].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_gemini_vertex/test_url_input_force_download.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_builtin_tools_with_other_tools.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_httpx_client_is_not_closed.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_image_and_text_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_image_generation.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_image_generation_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_image_generation_with_text.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_image_or_text_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_builtin_code_execution_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_code_execution_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_document_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_empty_assistant_response.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_empty_user_prompt.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_image_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_instructions.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_iter_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_max_tokens.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_multiple_documents_in_history.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_receive_web_search_history_from_another_provider.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_retry.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_safety_settings.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_server_tool_receive_history_from_another_provider.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_structured_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_text_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_text_document_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_thinking_config.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_thinking_part.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_thinking_part_from_other_model.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_thinking_part_iter.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_top_p.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_url_context_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_usage_limit_exceeded.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_usage_limit_not_exceeded.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_vertex_labels.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_vertex_provider.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_video_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_video_as_binary_content_input_with_vendor_metadata.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_video_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_web_search_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_web_search_tool_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_model_youtube_video_url_input_with_vendor_metadata.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_multiple_images.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_native_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_native_output_multiple.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_prompted_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_prompted_output_multiple.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_prompted_output_with_tools.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_text_output_function.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_timeout.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_tool_config_any_with_tool_without_args.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_tool_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_url_input[AudioUrl (gs)].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_url_input[AudioUrl].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_url_input[DocumentUrl (gs)].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_url_input[DocumentUrl].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_url_input[ImageUrl (gs)].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_url_input[ImageUrl].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_url_input[VideoUrl (YouTube)].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_url_input[VideoUrl (gs)].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_url_input[VideoUrl].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_url_input_force_download.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_vertexai_image_generation.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_google/test_google_vertexai_model_usage_limit_exceeded.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_extra_headers.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_groq_model_instructions.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_groq_model_thinking_part.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_groq_model_thinking_part_iter.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_groq_model_web_search_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_groq_model_web_search_tool_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_groq_native_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_groq_prompted_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_image_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_tool_regular_error.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_tool_use_failed_error.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_groq/test_tool_use_failed_error_streaming.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_hf_model_instructions.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_hf_model_thinking_part.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_hf_model_thinking_part_iter.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_image_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_max_completion_tokens[Qwen-Qwen2.5-72B-Instruct].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_max_completion_tokens[deepseek-ai-DeepSeek-R1-0528].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_max_completion_tokens[meta-llama-Llama-3.3-70B-Instruct].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_request_simple_success_with_vcr.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_request_simple_usage.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_simple_completion.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_huggingface/test_stream_completion.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_mistral/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_mistral/test_mistral_model_instructions.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_mistral/test_mistral_model_thinking_part.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_mistral/test_mistral_model_thinking_part_iter.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_model_names/test_known_model_names.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_audio_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_compatible_api_with_tool_calls_without_id.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_document_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_document_as_binary_content_input_with_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_document_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_extra_headers.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_image_url_input_force_download.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_image_url_input_force_download_response_api.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_image_url_tool_response.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_invalid_response.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_max_completion_tokens[gpt-4.5-preview].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_max_completion_tokens[gpt-4o-mini].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_max_completion_tokens[o3-mini].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_multiple_agent_tool_calls.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_audio_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_instructions.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_instructions_with_tool_calls_keep_instructions.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_model_cerebras_provider.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_model_cerebras_provider_harmony.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_model_cerebras_provider_qwen_3_coder.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_model_settings_temperature_ignored_on_gpt_5.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_model_thinking_part.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_model_without_system_prompt.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_native_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_native_output_multiple.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_o1_mini_system_role[developer].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_o1_mini_system_role[system].yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_prompted_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_prompted_output_multiple.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_text_output_function.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_tool_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_web_search_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_web_search_tool_model_not_supported.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_openai_web_search_tool_with_user_location.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_reasoning_model_with_temperature.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_system_prompt_role_o1_mini.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_text_document_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_text_document_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_text_response.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_user_id.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai/test_valid_response.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_audio_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_image_as_binary_content_tool_response.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_native_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_native_output_multiple.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id_auto_mode.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_code_execution_return_image.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_code_execution_return_image_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_document_as_binary_content_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_document_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_history_with_combined_tool_call_id.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_and_text_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_generation.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_generation_jpeg.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_generation_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_generation_tool_without_image_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_generation_with_native_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_generation_with_prompted_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_generation_with_tool_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_generation_with_tools.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_or_text_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_builtin_tools.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_builtin_tools_web_search.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_http_error.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_instructions.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_mcp_server_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_mcp_server_tool_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_mcp_server_tool_with_connector.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_retry.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_simple_response.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_simple_response_with_tool_call.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_thinking_part.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_web_search_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_web_search_tool_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_web_search_tool_with_invalid_region.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_web_search_tool_with_user_location.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_multiple_images.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_non_reasoning_model_no_item_ids.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_output_type.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_reasoning_effort.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_reasoning_generate_summary.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_requires_function_call_status_none.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_streaming_usage.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_system_prompt.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_text_document_url_input.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_thinking_part_from_other_model.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_thinking_part_iter.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_thinking_with_code_execution_tool.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_thinking_with_code_execution_tool_stream.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_thinking_with_modified_history.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_thinking_with_tool_calls.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_openai_responses_verbosity.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_prompted_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_prompted_output_multiple.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_reasoning_model_with_temperature.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_text_output_function.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/cassettes/test_openai_responses/test_tool_output.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/mock_async_stream.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/mock_openai.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_bedrock.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_cohere.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_deepseek.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_download_item.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_fallback.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_gemini.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_gemini_vertex.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_google.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_groq.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_huggingface.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_instrumented.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_mcp_sampling.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_mistral.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_model_function.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_model_names.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_model_request_parameters.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_model_settings.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/models/test_model_test.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/parts_from_messages.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/__init__.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/cassettes/test_azure/test_azure_provider_call.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/cassettes/test_gateway/test_gateway_provider_with_bedrock.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/cassettes/test_gateway/test_gateway_provider_with_google_vertex.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/cassettes/test_gateway/test_gateway_provider_with_groq.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/cassettes/test_gateway/test_gateway_provider_with_openai.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/cassettes/test_gateway/test_gateway_provider_with_openai_responses.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/cassettes/test_google_vertex/test_vertexai_provider.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/cassettes/test_heroku/test_heroku_model_provider_claude_3_7_sonnet.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/cassettes/test_openrouter/test_openrouter_with_google_model.yaml +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_anthropic.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_azure.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_bedrock.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_cerebras.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_cohere.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_deepseek.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_fireworks.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_github.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_google_gla.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_google_vertex.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_grok.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_groq.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_heroku.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_huggingface.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_mistral.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_moonshotai.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_nebius.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_ollama.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_openai.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_openrouter.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_ovhcloud.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/providers/test_together.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_a2a.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_ag_ui.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_agent.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_cli.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_dbos.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_deps.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_direct.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_examples.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_fastmcp.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_format_as_xml.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_function_schema.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_history_processor.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_json_body_serializer.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_logfire.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_mcp.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_messages.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_parts_manager.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_prefect.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_settings.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_temporal.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_tenacity.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_thinking_part.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_tools.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_toolsets.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_usage_limits.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/test_utils.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/typed_agent.py +0 -0
- {pydantic_ai-1.6.0 → pydantic_ai-1.8.0}/tests/typed_graph.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydantic-ai
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.0
|
|
4
4
|
Summary: Agent Framework / shim to use Pydantic with LLMs
|
|
5
5
|
Project-URL: Homepage, https://ai.pydantic.dev
|
|
6
6
|
Project-URL: Source, https://github.com/pydantic/pydantic-ai
|
|
@@ -26,15 +26,15 @@ Classifier: Topic :: Internet
|
|
|
26
26
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
27
27
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
28
28
|
Requires-Python: >=3.10
|
|
29
|
-
Requires-Dist: pydantic-ai-slim[ag-ui,anthropic,bedrock,cli,cohere,evals,fastmcp,google,groq,huggingface,logfire,mcp,mistral,openai,retries,temporal,vertexai]==1.
|
|
29
|
+
Requires-Dist: pydantic-ai-slim[ag-ui,anthropic,bedrock,cli,cohere,evals,fastmcp,google,groq,huggingface,logfire,mcp,mistral,openai,retries,temporal,vertexai]==1.8.0
|
|
30
30
|
Provides-Extra: a2a
|
|
31
31
|
Requires-Dist: fasta2a>=0.4.1; extra == 'a2a'
|
|
32
32
|
Provides-Extra: dbos
|
|
33
|
-
Requires-Dist: pydantic-ai-slim[dbos]==1.
|
|
33
|
+
Requires-Dist: pydantic-ai-slim[dbos]==1.8.0; extra == 'dbos'
|
|
34
34
|
Provides-Extra: examples
|
|
35
|
-
Requires-Dist: pydantic-ai-examples==1.
|
|
35
|
+
Requires-Dist: pydantic-ai-examples==1.8.0; extra == 'examples'
|
|
36
36
|
Provides-Extra: prefect
|
|
37
|
-
Requires-Dist: pydantic-ai-slim[prefect]==1.
|
|
37
|
+
Requires-Dist: pydantic-ai-slim[prefect]==1.8.0; extra == 'prefect'
|
|
38
38
|
Description-Content-Type: text/markdown
|
|
39
39
|
|
|
40
40
|
<div align="center">
|
|
@@ -78,7 +78,7 @@ We built Pydantic AI with one simple aim: to bring that FastAPI feeling to GenAI
|
|
|
78
78
|
[Pydantic Validation](https://docs.pydantic.dev/latest/) is the validation layer of the OpenAI SDK, the Google ADK, the Anthropic SDK, LangChain, LlamaIndex, AutoGPT, Transformers, CrewAI, Instructor and many more. _Why use the derivative when you can go straight to the source?_ :smiley:
|
|
79
79
|
|
|
80
80
|
2. **Model-agnostic**:
|
|
81
|
-
Supports virtually every [model](https://ai.pydantic.dev/models/overview) and provider: OpenAI, Anthropic, Gemini, DeepSeek, Grok, Cohere, Mistral, and Perplexity; Azure AI Foundry, Amazon Bedrock, Google Vertex AI, Ollama, LiteLLM, Groq, OpenRouter, Together AI, Fireworks AI, Cerebras, Hugging Face, GitHub, Heroku, Vercel, Nebius, OVHcloud. If your favorite model or provider is not listed, you can easily implement a [custom model](https://ai.pydantic.dev/models/overview#custom-models).
|
|
81
|
+
Supports virtually every [model](https://ai.pydantic.dev/models/overview) and provider: OpenAI, Anthropic, Gemini, DeepSeek, Grok, Cohere, Mistral, and Perplexity; Azure AI Foundry, Amazon Bedrock, Google Vertex AI, Ollama, LiteLLM, Groq, OpenRouter, Together AI, Fireworks AI, Cerebras, Hugging Face, GitHub, Heroku, Vercel, Nebius, OVHcloud, and Outlines. If your favorite model or provider is not listed, you can easily implement a [custom model](https://ai.pydantic.dev/models/overview#custom-models).
|
|
82
82
|
|
|
83
83
|
3. **Seamless Observability**:
|
|
84
84
|
Tightly [integrates](https://ai.pydantic.dev/logfire) with [Pydantic Logfire](https://pydantic.dev/logfire), our general-purpose OpenTelemetry observability platform, for real-time debugging, evals-based performance monitoring, and behavior, tracing, and cost tracking. If you already have an observability platform that supports OTel, you can [use that too](https://ai.pydantic.dev/logfire#alternative-observability-backends).
|
|
@@ -39,7 +39,7 @@ We built Pydantic AI with one simple aim: to bring that FastAPI feeling to GenAI
|
|
|
39
39
|
[Pydantic Validation](https://docs.pydantic.dev/latest/) is the validation layer of the OpenAI SDK, the Google ADK, the Anthropic SDK, LangChain, LlamaIndex, AutoGPT, Transformers, CrewAI, Instructor and many more. _Why use the derivative when you can go straight to the source?_ :smiley:
|
|
40
40
|
|
|
41
41
|
2. **Model-agnostic**:
|
|
42
|
-
Supports virtually every [model](https://ai.pydantic.dev/models/overview) and provider: OpenAI, Anthropic, Gemini, DeepSeek, Grok, Cohere, Mistral, and Perplexity; Azure AI Foundry, Amazon Bedrock, Google Vertex AI, Ollama, LiteLLM, Groq, OpenRouter, Together AI, Fireworks AI, Cerebras, Hugging Face, GitHub, Heroku, Vercel, Nebius, OVHcloud. If your favorite model or provider is not listed, you can easily implement a [custom model](https://ai.pydantic.dev/models/overview#custom-models).
|
|
42
|
+
Supports virtually every [model](https://ai.pydantic.dev/models/overview) and provider: OpenAI, Anthropic, Gemini, DeepSeek, Grok, Cohere, Mistral, and Perplexity; Azure AI Foundry, Amazon Bedrock, Google Vertex AI, Ollama, LiteLLM, Groq, OpenRouter, Together AI, Fireworks AI, Cerebras, Hugging Face, GitHub, Heroku, Vercel, Nebius, OVHcloud, and Outlines. If your favorite model or provider is not listed, you can easily implement a [custom model](https://ai.pydantic.dev/models/overview#custom-models).
|
|
43
43
|
|
|
44
44
|
3. **Seamless Observability**:
|
|
45
45
|
Tightly [integrates](https://ai.pydantic.dev/logfire) with [Pydantic Logfire](https://pydantic.dev/logfire), our general-purpose OpenTelemetry observability platform, for real-time debugging, evals-based performance monitoring, and behavior, tracing, and cost tracking. If you already have an observability platform that supports OTel, you can [use that too](https://ai.pydantic.dev/logfire#alternative-observability-backends).
|
|
@@ -105,6 +105,11 @@ dev = [
|
|
|
105
105
|
"pip>=25.2",
|
|
106
106
|
"genai-prices>=0.0.28",
|
|
107
107
|
"mcp-run-python>=0.0.20",
|
|
108
|
+
# Needed to test Outlines (not included in the default installation)
|
|
109
|
+
"pydantic-ai-slim[outlines-transformers]",
|
|
110
|
+
"pydantic-ai-slim[outlines-llamacpp]",
|
|
111
|
+
"pydantic-ai-slim[outlines-mlxlm]",
|
|
112
|
+
"pydantic-ai-slim[outlines-sglang]",
|
|
108
113
|
]
|
|
109
114
|
lint = ["mypy>=1.11.2", "pyright>=1.1.390", "ruff>=0.6.9"]
|
|
110
115
|
docs = [
|
|
@@ -21,7 +21,7 @@ interactions:
|
|
|
21
21
|
- text: What is the capital of France? Give me an answer that contains the word "Paris", but is not the first word.
|
|
22
22
|
type: text
|
|
23
23
|
role: user
|
|
24
|
-
model: claude-
|
|
24
|
+
model: claude-sonnet-4-5
|
|
25
25
|
stop_sequences:
|
|
26
26
|
- Paris
|
|
27
27
|
stream: false
|
|
@@ -43,7 +43,7 @@ interactions:
|
|
|
43
43
|
- text: 'The beautiful city of '
|
|
44
44
|
type: text
|
|
45
45
|
id: msg_01376yZQxHcw9pER2Ab2SvQb
|
|
46
|
-
model: claude-
|
|
46
|
+
model: claude-sonnet-4-5-20250929
|
|
47
47
|
role: assistant
|
|
48
48
|
stop_reason: stop_sequence
|
|
49
49
|
stop_sequence: Paris
|
|
@@ -498,7 +498,7 @@ def model(
|
|
|
498
498
|
from pydantic_ai.models.anthropic import AnthropicModel
|
|
499
499
|
from pydantic_ai.providers.anthropic import AnthropicProvider
|
|
500
500
|
|
|
501
|
-
return AnthropicModel('claude-
|
|
501
|
+
return AnthropicModel('claude-sonnet-4-5', provider=AnthropicProvider(api_key=anthropic_api_key))
|
|
502
502
|
elif request.param == 'mistral':
|
|
503
503
|
from pydantic_ai.models.mistral import MistralModel
|
|
504
504
|
from pydantic_ai.providers.mistral import MistralProvider
|
|
@@ -536,6 +536,18 @@ def model(
|
|
|
536
536
|
'Qwen/Qwen2.5-72B-Instruct',
|
|
537
537
|
provider=HuggingFaceProvider(provider_name='nebius', api_key=huggingface_api_key),
|
|
538
538
|
)
|
|
539
|
+
elif request.param == 'outlines':
|
|
540
|
+
from outlines.models.transformers import from_transformers
|
|
541
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
542
|
+
|
|
543
|
+
from pydantic_ai.models.outlines import OutlinesModel
|
|
544
|
+
|
|
545
|
+
return OutlinesModel(
|
|
546
|
+
from_transformers(
|
|
547
|
+
AutoModelForCausalLM.from_pretrained('erwanf/gpt2-mini'),
|
|
548
|
+
AutoTokenizer.from_pretrained('erwanf/gpt2-mini'),
|
|
549
|
+
)
|
|
550
|
+
)
|
|
539
551
|
else:
|
|
540
552
|
raise ValueError(f'Unknown model: {request.param}')
|
|
541
553
|
except ImportError:
|
|
@@ -1530,7 +1530,7 @@ async def test_evaluate_async_logfire(
|
|
|
1530
1530
|
return TaskOutput(answer='Paris')
|
|
1531
1531
|
return TaskOutput(answer='Unknown') # pragma: no cover
|
|
1532
1532
|
|
|
1533
|
-
await example_dataset.evaluate(mock_async_task)
|
|
1533
|
+
await example_dataset.evaluate(mock_async_task, metadata={'key': 'value'})
|
|
1534
1534
|
|
|
1535
1535
|
spans = capfire.exporter.exported_spans_as_dict(parse_json_attributes=True)
|
|
1536
1536
|
spans.sort(key=lambda s: s['start_time'])
|
|
@@ -1556,6 +1556,7 @@ async def test_evaluate_async_logfire(
|
|
|
1556
1556
|
'gen_ai.operation.name': {},
|
|
1557
1557
|
'n_cases': {},
|
|
1558
1558
|
'name': {},
|
|
1559
|
+
'metadata': {'type': 'object'},
|
|
1559
1560
|
'logfire.experiment.metadata': {
|
|
1560
1561
|
'type': 'object',
|
|
1561
1562
|
'properties': {
|
|
@@ -1571,11 +1572,13 @@ async def test_evaluate_async_logfire(
|
|
|
1571
1572
|
'type': 'object',
|
|
1572
1573
|
},
|
|
1573
1574
|
'logfire.msg': 'evaluate mock_async_task',
|
|
1575
|
+
'metadata': {'key': 'value'},
|
|
1574
1576
|
'logfire.msg_template': 'evaluate {name}',
|
|
1575
1577
|
'logfire.span_type': 'span',
|
|
1576
1578
|
'n_cases': 2,
|
|
1577
1579
|
'logfire.experiment.metadata': {
|
|
1578
1580
|
'n_cases': 2,
|
|
1581
|
+
'metadata': {'key': 'value'},
|
|
1579
1582
|
'averages': {
|
|
1580
1583
|
'name': 'Averages',
|
|
1581
1584
|
'scores': {'confidence': 1.0},
|
|
@@ -1750,3 +1753,23 @@ async def test_evaluate_async_logfire(
|
|
|
1750
1753
|
),
|
|
1751
1754
|
]
|
|
1752
1755
|
)
|
|
1756
|
+
|
|
1757
|
+
|
|
1758
|
+
async def test_evaluate_with_experiment_metadata(example_dataset: Dataset[TaskInput, TaskOutput, TaskMetadata]):
|
|
1759
|
+
"""Test that experiment metadata passed to evaluate() appears in the report."""
|
|
1760
|
+
|
|
1761
|
+
async def task(inputs: TaskInput) -> TaskOutput:
|
|
1762
|
+
return TaskOutput(answer=inputs.query.upper())
|
|
1763
|
+
|
|
1764
|
+
# Pass experiment metadata to evaluate()
|
|
1765
|
+
experiment_metadata = {
|
|
1766
|
+
'model': 'gpt-4o',
|
|
1767
|
+
'prompt_version': 'v2.1',
|
|
1768
|
+
'temperature': 0.7,
|
|
1769
|
+
'max_tokens': 1000,
|
|
1770
|
+
}
|
|
1771
|
+
|
|
1772
|
+
report = await example_dataset.evaluate(task, metadata=experiment_metadata)
|
|
1773
|
+
|
|
1774
|
+
# Verify that the report contains the experiment metadata
|
|
1775
|
+
assert report.experiment_metadata == experiment_metadata
|
|
@@ -950,3 +950,415 @@ async def test_evaluation_renderer_no_evaluator_failures_column():
|
|
|
950
950
|
│ test_case │ {'query': 'What is 2+2?'} │ {'answer': '4'} │ accuracy: 0.950 │ 0.100 │
|
|
951
951
|
└───────────┴───────────────────────────┴─────────────────┴─────────────────┴──────────┘
|
|
952
952
|
""")
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
async def test_evaluation_renderer_with_experiment_metadata(sample_report_case: ReportCase):
|
|
956
|
+
"""Test EvaluationRenderer with experiment metadata."""
|
|
957
|
+
report = EvaluationReport(
|
|
958
|
+
cases=[sample_report_case],
|
|
959
|
+
name='test_report',
|
|
960
|
+
experiment_metadata={'model': 'gpt-4o', 'temperature': 0.7, 'prompt_version': 'v2'},
|
|
961
|
+
)
|
|
962
|
+
|
|
963
|
+
output = report.render(
|
|
964
|
+
include_input=True,
|
|
965
|
+
include_metadata=False,
|
|
966
|
+
include_expected_output=False,
|
|
967
|
+
include_output=False,
|
|
968
|
+
include_durations=True,
|
|
969
|
+
include_total_duration=False,
|
|
970
|
+
include_removed_cases=False,
|
|
971
|
+
include_averages=True,
|
|
972
|
+
include_errors=False,
|
|
973
|
+
include_error_stacktrace=False,
|
|
974
|
+
include_evaluator_failures=True,
|
|
975
|
+
input_config={},
|
|
976
|
+
metadata_config={},
|
|
977
|
+
output_config={},
|
|
978
|
+
score_configs={},
|
|
979
|
+
label_configs={},
|
|
980
|
+
metric_configs={},
|
|
981
|
+
duration_config={},
|
|
982
|
+
include_reasons=False,
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
assert output == snapshot("""\
|
|
986
|
+
╭─ Evaluation Summary: test_report ─╮
|
|
987
|
+
│ model: gpt-4o │
|
|
988
|
+
│ temperature: 0.7 │
|
|
989
|
+
│ prompt_version: v2 │
|
|
990
|
+
╰───────────────────────────────────╯
|
|
991
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┓
|
|
992
|
+
┃ Case ID ┃ Inputs ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Duration ┃
|
|
993
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━┩
|
|
994
|
+
│ test_case │ {'query': 'What is 2+2?'} │ score1: 2.50 │ label1: hello │ accuracy: 0.950 │ ✔ │ 100.0ms │
|
|
995
|
+
├───────────┼───────────────────────────┼──────────────┼────────────────────────┼─────────────────┼────────────┼──────────┤
|
|
996
|
+
│ Averages │ │ score1: 2.50 │ label1: {'hello': 1.0} │ accuracy: 0.950 │ 100.0% ✔ │ 100.0ms │
|
|
997
|
+
└───────────┴───────────────────────────┴──────────────┴────────────────────────┴─────────────────┴────────────┴──────────┘
|
|
998
|
+
""")
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
async def test_evaluation_renderer_with_long_experiment_metadata(sample_report_case: ReportCase):
|
|
1002
|
+
"""Test EvaluationRenderer with very long experiment metadata."""
|
|
1003
|
+
report = EvaluationReport(
|
|
1004
|
+
cases=[sample_report_case],
|
|
1005
|
+
name='test_report',
|
|
1006
|
+
experiment_metadata={
|
|
1007
|
+
'model': 'gpt-4o-2024-08-06',
|
|
1008
|
+
'temperature': 0.7,
|
|
1009
|
+
'prompt_version': 'v2.1.5',
|
|
1010
|
+
'system_prompt': 'You are a helpful assistant',
|
|
1011
|
+
'max_tokens': 1000,
|
|
1012
|
+
'top_p': 0.9,
|
|
1013
|
+
'frequency_penalty': 0.1,
|
|
1014
|
+
'presence_penalty': 0.1,
|
|
1015
|
+
},
|
|
1016
|
+
)
|
|
1017
|
+
|
|
1018
|
+
output = report.render(
|
|
1019
|
+
include_input=False,
|
|
1020
|
+
include_metadata=False,
|
|
1021
|
+
include_expected_output=False,
|
|
1022
|
+
include_output=False,
|
|
1023
|
+
include_durations=True,
|
|
1024
|
+
include_total_duration=False,
|
|
1025
|
+
include_removed_cases=False,
|
|
1026
|
+
include_averages=False,
|
|
1027
|
+
include_errors=False,
|
|
1028
|
+
include_error_stacktrace=False,
|
|
1029
|
+
include_evaluator_failures=True,
|
|
1030
|
+
input_config={},
|
|
1031
|
+
metadata_config={},
|
|
1032
|
+
output_config={},
|
|
1033
|
+
score_configs={},
|
|
1034
|
+
label_configs={},
|
|
1035
|
+
metric_configs={},
|
|
1036
|
+
duration_config={},
|
|
1037
|
+
include_reasons=False,
|
|
1038
|
+
)
|
|
1039
|
+
|
|
1040
|
+
assert output == snapshot("""\
|
|
1041
|
+
╭─ Evaluation Summary: test_report ──────────╮
|
|
1042
|
+
│ model: gpt-4o-2024-08-06 │
|
|
1043
|
+
│ temperature: 0.7 │
|
|
1044
|
+
│ prompt_version: v2.1.5 │
|
|
1045
|
+
│ system_prompt: You are a helpful assistant │
|
|
1046
|
+
│ max_tokens: 1000 │
|
|
1047
|
+
│ top_p: 0.9 │
|
|
1048
|
+
│ frequency_penalty: 0.1 │
|
|
1049
|
+
│ presence_penalty: 0.1 │
|
|
1050
|
+
╰────────────────────────────────────────────╯
|
|
1051
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┓
|
|
1052
|
+
┃ Case ID ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Duration ┃
|
|
1053
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━┩
|
|
1054
|
+
│ test_case │ score1: 2.50 │ label1: hello │ accuracy: 0.950 │ ✔ │ 100.0ms │
|
|
1055
|
+
└───────────┴──────────────┴───────────────┴─────────────────┴────────────┴──────────┘
|
|
1056
|
+
""")
|
|
1057
|
+
|
|
1058
|
+
|
|
1059
|
+
async def test_evaluation_renderer_diff_with_experiment_metadata(sample_report_case: ReportCase):
|
|
1060
|
+
"""Test EvaluationRenderer diff table with experiment metadata."""
|
|
1061
|
+
baseline_report = EvaluationReport(
|
|
1062
|
+
cases=[sample_report_case],
|
|
1063
|
+
name='baseline_report',
|
|
1064
|
+
experiment_metadata={'model': 'gpt-4', 'temperature': 0.5},
|
|
1065
|
+
)
|
|
1066
|
+
|
|
1067
|
+
new_report = EvaluationReport(
|
|
1068
|
+
cases=[sample_report_case],
|
|
1069
|
+
name='new_report',
|
|
1070
|
+
experiment_metadata={'model': 'gpt-4o', 'temperature': 0.7},
|
|
1071
|
+
)
|
|
1072
|
+
|
|
1073
|
+
output = new_report.render(
|
|
1074
|
+
baseline=baseline_report,
|
|
1075
|
+
include_input=False,
|
|
1076
|
+
include_metadata=False,
|
|
1077
|
+
include_expected_output=False,
|
|
1078
|
+
include_output=False,
|
|
1079
|
+
include_durations=True,
|
|
1080
|
+
include_total_duration=False,
|
|
1081
|
+
include_removed_cases=False,
|
|
1082
|
+
include_averages=True,
|
|
1083
|
+
include_errors=False,
|
|
1084
|
+
include_error_stacktrace=False,
|
|
1085
|
+
include_evaluator_failures=True,
|
|
1086
|
+
input_config={},
|
|
1087
|
+
metadata_config={},
|
|
1088
|
+
output_config={},
|
|
1089
|
+
score_configs={},
|
|
1090
|
+
label_configs={},
|
|
1091
|
+
metric_configs={},
|
|
1092
|
+
duration_config={},
|
|
1093
|
+
include_reasons=False,
|
|
1094
|
+
)
|
|
1095
|
+
|
|
1096
|
+
assert output == snapshot("""\
|
|
1097
|
+
╭─ Evaluation Diff: baseline_report → new_report ─╮
|
|
1098
|
+
│ model: gpt-4 → gpt-4o │
|
|
1099
|
+
│ temperature: 0.5 → 0.7 │
|
|
1100
|
+
╰─────────────────────────────────────────────────╯
|
|
1101
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┓
|
|
1102
|
+
┃ Case ID ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Duration ┃
|
|
1103
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━┩
|
|
1104
|
+
│ test_case │ score1: 2.50 │ label1: hello │ accuracy: 0.950 │ ✔ │ 100.0ms │
|
|
1105
|
+
├───────────┼──────────────┼────────────────────────┼─────────────────┼────────────┼──────────┤
|
|
1106
|
+
│ Averages │ score1: 2.50 │ label1: {'hello': 1.0} │ accuracy: 0.950 │ 100.0% ✔ │ 100.0ms │
|
|
1107
|
+
└───────────┴──────────────┴────────────────────────┴─────────────────┴────────────┴──────────┘
|
|
1108
|
+
""")
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
async def test_evaluation_renderer_diff_with_only_new_metadata(sample_report_case: ReportCase):
|
|
1112
|
+
"""Test EvaluationRenderer diff table where only new report has metadata."""
|
|
1113
|
+
baseline_report = EvaluationReport(
|
|
1114
|
+
cases=[sample_report_case],
|
|
1115
|
+
name='baseline_report',
|
|
1116
|
+
experiment_metadata=None, # No metadata
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
new_report = EvaluationReport(
|
|
1120
|
+
cases=[sample_report_case],
|
|
1121
|
+
name='new_report',
|
|
1122
|
+
experiment_metadata={'model': 'gpt-4o', 'temperature': 0.7},
|
|
1123
|
+
)
|
|
1124
|
+
|
|
1125
|
+
output = new_report.render(
|
|
1126
|
+
baseline=baseline_report,
|
|
1127
|
+
include_input=False,
|
|
1128
|
+
include_metadata=False,
|
|
1129
|
+
include_expected_output=False,
|
|
1130
|
+
include_output=False,
|
|
1131
|
+
include_durations=True,
|
|
1132
|
+
include_total_duration=False,
|
|
1133
|
+
include_removed_cases=False,
|
|
1134
|
+
include_averages=False,
|
|
1135
|
+
include_errors=False,
|
|
1136
|
+
include_error_stacktrace=False,
|
|
1137
|
+
include_evaluator_failures=True,
|
|
1138
|
+
input_config={},
|
|
1139
|
+
metadata_config={},
|
|
1140
|
+
output_config={},
|
|
1141
|
+
score_configs={},
|
|
1142
|
+
label_configs={},
|
|
1143
|
+
metric_configs={},
|
|
1144
|
+
duration_config={},
|
|
1145
|
+
include_reasons=False,
|
|
1146
|
+
)
|
|
1147
|
+
|
|
1148
|
+
assert output == snapshot("""\
|
|
1149
|
+
╭─ Evaluation Diff: baseline_report → new_report ─╮
|
|
1150
|
+
│ + model: gpt-4o │
|
|
1151
|
+
│ + temperature: 0.7 │
|
|
1152
|
+
╰─────────────────────────────────────────────────╯
|
|
1153
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┓
|
|
1154
|
+
┃ Case ID ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Duration ┃
|
|
1155
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━┩
|
|
1156
|
+
│ test_case │ score1: 2.50 │ label1: hello │ accuracy: 0.950 │ ✔ │ 100.0ms │
|
|
1157
|
+
└───────────┴──────────────┴───────────────┴─────────────────┴────────────┴──────────┘
|
|
1158
|
+
""")
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
async def test_evaluation_renderer_diff_with_only_baseline_metadata(sample_report_case: ReportCase):
|
|
1162
|
+
"""Test EvaluationRenderer diff table where only baseline report has metadata."""
|
|
1163
|
+
baseline_report = EvaluationReport(
|
|
1164
|
+
cases=[sample_report_case],
|
|
1165
|
+
name='baseline_report',
|
|
1166
|
+
experiment_metadata={'model': 'gpt-4', 'temperature': 0.5},
|
|
1167
|
+
)
|
|
1168
|
+
|
|
1169
|
+
new_report = EvaluationReport(
|
|
1170
|
+
cases=[sample_report_case],
|
|
1171
|
+
name='new_report',
|
|
1172
|
+
experiment_metadata=None, # No metadata
|
|
1173
|
+
)
|
|
1174
|
+
|
|
1175
|
+
output = new_report.render(
|
|
1176
|
+
baseline=baseline_report,
|
|
1177
|
+
include_input=False,
|
|
1178
|
+
include_metadata=False,
|
|
1179
|
+
include_expected_output=False,
|
|
1180
|
+
include_output=False,
|
|
1181
|
+
include_durations=True,
|
|
1182
|
+
include_total_duration=False,
|
|
1183
|
+
include_removed_cases=False,
|
|
1184
|
+
include_averages=False,
|
|
1185
|
+
include_errors=False,
|
|
1186
|
+
include_error_stacktrace=False,
|
|
1187
|
+
include_evaluator_failures=True,
|
|
1188
|
+
input_config={},
|
|
1189
|
+
metadata_config={},
|
|
1190
|
+
output_config={},
|
|
1191
|
+
score_configs={},
|
|
1192
|
+
label_configs={},
|
|
1193
|
+
metric_configs={},
|
|
1194
|
+
duration_config={},
|
|
1195
|
+
include_reasons=False,
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
assert output == snapshot("""\
|
|
1199
|
+
╭─ Evaluation Diff: baseline_report → new_report ─╮
|
|
1200
|
+
│ - model: gpt-4 │
|
|
1201
|
+
│ - temperature: 0.5 │
|
|
1202
|
+
╰─────────────────────────────────────────────────╯
|
|
1203
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┓
|
|
1204
|
+
┃ Case ID ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Duration ┃
|
|
1205
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━┩
|
|
1206
|
+
│ test_case │ score1: 2.50 │ label1: hello │ accuracy: 0.950 │ ✔ │ 100.0ms │
|
|
1207
|
+
└───────────┴──────────────┴───────────────┴─────────────────┴────────────┴──────────┘
|
|
1208
|
+
""")
|
|
1209
|
+
|
|
1210
|
+
|
|
1211
|
+
async def test_evaluation_renderer_diff_with_same_metadata(sample_report_case: ReportCase):
|
|
1212
|
+
"""Test EvaluationRenderer diff table where both reports have the same metadata."""
|
|
1213
|
+
metadata = {'model': 'gpt-4o', 'temperature': 0.7}
|
|
1214
|
+
|
|
1215
|
+
baseline_report = EvaluationReport(
|
|
1216
|
+
cases=[sample_report_case],
|
|
1217
|
+
name='baseline_report',
|
|
1218
|
+
experiment_metadata=metadata,
|
|
1219
|
+
)
|
|
1220
|
+
|
|
1221
|
+
new_report = EvaluationReport(
|
|
1222
|
+
cases=[sample_report_case],
|
|
1223
|
+
name='new_report',
|
|
1224
|
+
experiment_metadata=metadata,
|
|
1225
|
+
)
|
|
1226
|
+
|
|
1227
|
+
output = new_report.render(
|
|
1228
|
+
include_input=False,
|
|
1229
|
+
include_metadata=False,
|
|
1230
|
+
include_expected_output=False,
|
|
1231
|
+
include_output=False,
|
|
1232
|
+
include_durations=True,
|
|
1233
|
+
include_total_duration=False,
|
|
1234
|
+
include_removed_cases=False,
|
|
1235
|
+
include_averages=False,
|
|
1236
|
+
include_error_stacktrace=False,
|
|
1237
|
+
include_evaluator_failures=True,
|
|
1238
|
+
input_config={},
|
|
1239
|
+
metadata_config={},
|
|
1240
|
+
output_config={},
|
|
1241
|
+
score_configs={},
|
|
1242
|
+
label_configs={},
|
|
1243
|
+
metric_configs={},
|
|
1244
|
+
duration_config={},
|
|
1245
|
+
include_reasons=False,
|
|
1246
|
+
baseline=baseline_report,
|
|
1247
|
+
include_errors=False, # Prevent failures table from being added
|
|
1248
|
+
)
|
|
1249
|
+
assert output == snapshot("""\
|
|
1250
|
+
╭─ Evaluation Diff: baseline_report → new_report ─╮
|
|
1251
|
+
│ model: gpt-4o │
|
|
1252
|
+
│ temperature: 0.7 │
|
|
1253
|
+
╰─────────────────────────────────────────────────╯
|
|
1254
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┓
|
|
1255
|
+
┃ Case ID ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Duration ┃
|
|
1256
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━┩
|
|
1257
|
+
│ test_case │ score1: 2.50 │ label1: hello │ accuracy: 0.950 │ ✔ │ 100.0ms │
|
|
1258
|
+
└───────────┴──────────────┴───────────────┴─────────────────┴────────────┴──────────┘
|
|
1259
|
+
""")
|
|
1260
|
+
|
|
1261
|
+
|
|
1262
|
+
async def test_evaluation_renderer_diff_with_changed_metadata(sample_report_case: ReportCase):
|
|
1263
|
+
"""Test EvaluationRenderer diff table where both reports have the same metadata."""
|
|
1264
|
+
|
|
1265
|
+
baseline_report = EvaluationReport(
|
|
1266
|
+
cases=[sample_report_case],
|
|
1267
|
+
name='baseline_report',
|
|
1268
|
+
experiment_metadata={
|
|
1269
|
+
'updated-key': 'original value',
|
|
1270
|
+
'preserved-key': 'preserved value',
|
|
1271
|
+
'old-key': 'old value',
|
|
1272
|
+
},
|
|
1273
|
+
)
|
|
1274
|
+
|
|
1275
|
+
new_report = EvaluationReport(
|
|
1276
|
+
cases=[sample_report_case],
|
|
1277
|
+
name='new_report',
|
|
1278
|
+
experiment_metadata={
|
|
1279
|
+
'updated-key': 'updated value',
|
|
1280
|
+
'preserved-key': 'preserved value',
|
|
1281
|
+
'new-key': 'new value',
|
|
1282
|
+
},
|
|
1283
|
+
)
|
|
1284
|
+
|
|
1285
|
+
output = new_report.render(
|
|
1286
|
+
include_input=False,
|
|
1287
|
+
include_metadata=False,
|
|
1288
|
+
include_expected_output=False,
|
|
1289
|
+
include_output=False,
|
|
1290
|
+
include_durations=True,
|
|
1291
|
+
include_total_duration=False,
|
|
1292
|
+
include_removed_cases=False,
|
|
1293
|
+
include_averages=False,
|
|
1294
|
+
include_error_stacktrace=False,
|
|
1295
|
+
include_evaluator_failures=True,
|
|
1296
|
+
input_config={},
|
|
1297
|
+
metadata_config={},
|
|
1298
|
+
output_config={},
|
|
1299
|
+
score_configs={},
|
|
1300
|
+
label_configs={},
|
|
1301
|
+
metric_configs={},
|
|
1302
|
+
duration_config={},
|
|
1303
|
+
include_reasons=False,
|
|
1304
|
+
baseline=baseline_report,
|
|
1305
|
+
include_errors=False, # Prevent failures table from being added
|
|
1306
|
+
)
|
|
1307
|
+
assert output == snapshot("""\
|
|
1308
|
+
╭─ Evaluation Diff: baseline_report → new_report ─╮
|
|
1309
|
+
│ + new-key: new value │
|
|
1310
|
+
│ - old-key: old value │
|
|
1311
|
+
│ preserved-key: preserved value │
|
|
1312
|
+
│ updated-key: original value → updated value │
|
|
1313
|
+
╰─────────────────────────────────────────────────╯
|
|
1314
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┓
|
|
1315
|
+
┃ Case ID ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Duration ┃
|
|
1316
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━┩
|
|
1317
|
+
│ test_case │ score1: 2.50 │ label1: hello │ accuracy: 0.950 │ ✔ │ 100.0ms │
|
|
1318
|
+
└───────────┴──────────────┴───────────────┴─────────────────┴────────────┴──────────┘
|
|
1319
|
+
""")
|
|
1320
|
+
|
|
1321
|
+
|
|
1322
|
+
async def test_evaluation_renderer_diff_with_no_metadata(sample_report_case: ReportCase):
|
|
1323
|
+
"""Test EvaluationRenderer diff table where both reports have the same metadata."""
|
|
1324
|
+
|
|
1325
|
+
baseline_report = EvaluationReport(
|
|
1326
|
+
cases=[sample_report_case],
|
|
1327
|
+
name='baseline_report',
|
|
1328
|
+
)
|
|
1329
|
+
|
|
1330
|
+
new_report = EvaluationReport(
|
|
1331
|
+
cases=[sample_report_case],
|
|
1332
|
+
name='new_report',
|
|
1333
|
+
)
|
|
1334
|
+
|
|
1335
|
+
output = new_report.render(
|
|
1336
|
+
include_input=False,
|
|
1337
|
+
include_metadata=False,
|
|
1338
|
+
include_expected_output=False,
|
|
1339
|
+
include_output=False,
|
|
1340
|
+
include_durations=True,
|
|
1341
|
+
include_total_duration=False,
|
|
1342
|
+
include_removed_cases=False,
|
|
1343
|
+
include_averages=False,
|
|
1344
|
+
include_error_stacktrace=False,
|
|
1345
|
+
include_evaluator_failures=True,
|
|
1346
|
+
input_config={},
|
|
1347
|
+
metadata_config={},
|
|
1348
|
+
output_config={},
|
|
1349
|
+
score_configs={},
|
|
1350
|
+
label_configs={},
|
|
1351
|
+
metric_configs={},
|
|
1352
|
+
duration_config={},
|
|
1353
|
+
include_reasons=False,
|
|
1354
|
+
baseline=baseline_report,
|
|
1355
|
+
include_errors=False, # Prevent failures table from being added
|
|
1356
|
+
)
|
|
1357
|
+
assert output == snapshot("""\
|
|
1358
|
+
Evaluation Diff: baseline_report → new_report \n\
|
|
1359
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┓
|
|
1360
|
+
┃ Case ID ┃ Scores ┃ Labels ┃ Metrics ┃ Assertions ┃ Duration ┃
|
|
1361
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━┩
|
|
1362
|
+
│ test_case │ score1: 2.50 │ label1: hello │ accuracy: 0.950 │ ✔ │ 100.0ms │
|
|
1363
|
+
└───────────┴──────────────┴───────────────┴─────────────────┴────────────┴──────────┘
|
|
1364
|
+
""")
|
|
@@ -25,7 +25,7 @@ interactions:
|
|
|
25
25
|
- text: I need a potato!
|
|
26
26
|
type: text
|
|
27
27
|
role: user
|
|
28
|
-
model: claude-
|
|
28
|
+
model: claude-sonnet-4-5
|
|
29
29
|
stream: false
|
|
30
30
|
system: |+
|
|
31
31
|
You are a helpful assistant.
|
|
@@ -56,7 +56,7 @@ interactions:
|
|
|
56
56
|
What specific information about potatoes would be most helpful to you?
|
|
57
57
|
type: text
|
|
58
58
|
id: msg_01PAZFa5ciacA9ptgEDMbkZM
|
|
59
|
-
model: claude-
|
|
59
|
+
model: claude-sonnet-4-5-20250929
|
|
60
60
|
role: assistant
|
|
61
61
|
stop_reason: end_turn
|
|
62
62
|
stop_sequence: null
|
|
@@ -21,7 +21,7 @@ interactions:
|
|
|
21
21
|
- text: How do I cross the street?
|
|
22
22
|
type: text
|
|
23
23
|
role: user
|
|
24
|
-
model: claude-
|
|
24
|
+
model: claude-sonnet-4-5
|
|
25
25
|
stream: false
|
|
26
26
|
thinking:
|
|
27
27
|
budget_tokens: 1024
|
|
@@ -82,7 +82,7 @@ interactions:
|
|
|
82
82
|
Would you like me to explain any of these steps in more detail?
|
|
83
83
|
type: text
|
|
84
84
|
id: msg_01BnZvs3naGorn93wjjCDwbd
|
|
85
|
-
model: claude-
|
|
85
|
+
model: claude-sonnet-4-5-20250929
|
|
86
86
|
role: assistant
|
|
87
87
|
stop_reason: end_turn
|
|
88
88
|
stop_sequence: null
|
|
@@ -167,7 +167,7 @@ interactions:
|
|
|
167
167
|
- text: Considering the way to cross the street, analogously, how do I cross the river?
|
|
168
168
|
type: text
|
|
169
169
|
role: user
|
|
170
|
-
model: claude-
|
|
170
|
+
model: claude-sonnet-4-5
|
|
171
171
|
stream: false
|
|
172
172
|
thinking:
|
|
173
173
|
budget_tokens: 1024
|
|
@@ -235,7 +235,7 @@ interactions:
|
|
|
235
235
|
Is there a specific river crossing scenario you're curious about?
|
|
236
236
|
type: text
|
|
237
237
|
id: msg_019Z9a1qnqUCxd7Fj6PuuetE
|
|
238
|
-
model: claude-
|
|
238
|
+
model: claude-sonnet-4-5-20250929
|
|
239
239
|
role: assistant
|
|
240
240
|
stop_reason: end_turn
|
|
241
241
|
stop_sequence: null
|