pydantic-ai 0.0.51__tar.gz → 0.0.53__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/.gitignore +1 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/Makefile +5 -1
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/PKG-INFO +3 -3
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_dataset.py +2 -2
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_evaluator_common.py +2 -2
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_evaluators.py +33 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_gemini.py +24 -19
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_openai.py +497 -2
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/test_google_vertex.py +3 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_logfire.py +1 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_tools.py +16 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_utils.py +43 -11
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/LICENSE +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/README.md +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/pyproject.toml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/__init__.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/assets/dummy.pdf +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/assets/kiwi.png +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/assets/marcelo.mp3 +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/cassettes/test_mcp/test_agent_with_stdio_server.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/conftest.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/__init__.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_evaluator_base.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_evaluator_context.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_evaluator_spec.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_llm_as_a_judge.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_otel.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_render_numbers.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_reporting.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_reports.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/test_utils.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/evals/utils.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/example_modules/README.md +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/example_modules/bank_database.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/example_modules/fake_database.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/example_modules/weather_service.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/graph/__init__.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/graph/test_file_persistence.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/graph/test_graph.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/graph/test_mermaid.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/graph/test_persistence.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/graph/test_state.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/graph/test_utils.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/import_examples.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/json_body_serializer.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/mcp_server.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/__init__.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_anthropic/test_document_binary_content_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_anthropic/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_anthropic/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_anthropic/test_image_url_input_invalid_mime_type.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_anthropic/test_multiple_parallel_tool_calls.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_anthropic/test_text_document_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_bedrock_model.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_bedrock_model_anthropic_model_without_tools.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_bedrock_model_iter_stream.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_bedrock_model_max_tokens.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_bedrock_model_retry.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_bedrock_model_stream.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_bedrock_model_structured_response.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_bedrock_model_top_p.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_text_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_bedrock/test_text_document_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_cohere/test_request_simple_success_with_vcr.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_gemini/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_gemini/test_gemini_drop_exclusive_maximum.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_gemini/test_gemini_exclusive_minimum_and_maximum.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_gemini/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_gemini/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_groq/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_groq/test_image_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai/test_audio_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai/test_document_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai/test_max_completion_tokens[gpt-4.5-preview].yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai/test_max_completion_tokens[gpt-4o-mini].yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai/test_max_completion_tokens[o3-mini].yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai/test_multiple_agent_tool_calls.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai/test_openai_o1_mini_system_role[developer].yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai/test_openai_o1_mini_system_role[system].yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai/test_user_id.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_audio_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_image_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_document_as_binary_content_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_document_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_image_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_builtin_tools.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_http_error.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_retry.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_simple_response.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_model_simple_response_with_tool_call.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_reasoning_effort.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_reasoning_generate_summary.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_result_type.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_stream.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_system_prompt.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/cassettes/test_openai_responses/test_openai_responses_text_document_url_input.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/mock_async_stream.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_anthropic.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_bedrock.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_cohere.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_fallback.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_groq.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_instrumented.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_mistral.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_model.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_model_function.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_model_names.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_model_test.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/models/test_openai_responses.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/__init__.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/cassettes/test_azure/test_azure_provider_call.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/cassettes/test_google_vertex/test_vertexai_provider.yaml +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/test_anthropic.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/test_azure.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/test_bedrock.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/test_cohere.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/test_deepseek.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/test_google_gla.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/test_groq.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/test_mistral.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/test_openai.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/providers/test_provider_names.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_agent.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_cli.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_deps.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_examples.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_format_as_xml.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_json_body_serializer.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_live.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_mcp.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_messages.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_parts_manager.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_settings.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_streaming.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/test_usage_limits.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/typed_agent.py +0 -0
- {pydantic_ai-0.0.51 → pydantic_ai-0.0.53}/tests/typed_graph.py +0 -0
|
@@ -8,8 +8,12 @@
|
|
|
8
8
|
.pre-commit: ## Check that pre-commit is installed
|
|
9
9
|
@pre-commit -V || echo 'Please install pre-commit: https://pre-commit.com/'
|
|
10
10
|
|
|
11
|
+
.PHONY: .deno
|
|
12
|
+
.deno: ## Check that deno is installed
|
|
13
|
+
@deno --version > /dev/null 2>&1 || (printf "\033[0;31m✖ Error: deno is not installed, but is needed for mcp-run-python\033[0m\n Please install deno: https://docs.deno.com/runtime/getting_started/installation/\n" && exit 1)
|
|
14
|
+
|
|
11
15
|
.PHONY: install
|
|
12
|
-
install: .uv .pre-commit ## Install the package, dependencies, and pre-commit for local development
|
|
16
|
+
install: .uv .pre-commit .deno ## Install the package, dependencies, and pre-commit for local development
|
|
13
17
|
uv sync --frozen --all-extras --all-packages --group lint --group docs
|
|
14
18
|
pre-commit install --install-hooks
|
|
15
19
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydantic-ai
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.53
|
|
4
4
|
Summary: Agent Framework / shim to use Pydantic with LLMs
|
|
5
5
|
Project-URL: Homepage, https://ai.pydantic.dev
|
|
6
6
|
Project-URL: Source, https://github.com/pydantic/pydantic-ai
|
|
@@ -28,9 +28,9 @@ Classifier: Topic :: Internet
|
|
|
28
28
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
29
29
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
30
30
|
Requires-Python: >=3.9
|
|
31
|
-
Requires-Dist: pydantic-ai-slim[anthropic,bedrock,cli,cohere,evals,groq,mcp,mistral,openai,vertexai]==0.0.
|
|
31
|
+
Requires-Dist: pydantic-ai-slim[anthropic,bedrock,cli,cohere,evals,groq,mcp,mistral,openai,vertexai]==0.0.53
|
|
32
32
|
Provides-Extra: examples
|
|
33
|
-
Requires-Dist: pydantic-ai-examples==0.0.
|
|
33
|
+
Requires-Dist: pydantic-ai-examples==0.0.53; extra == 'examples'
|
|
34
34
|
Provides-Extra: logfire
|
|
35
35
|
Requires-Dist: logfire>=3.11.0; extra == 'logfire'
|
|
36
36
|
Description-Content-Type: text/markdown
|
|
@@ -611,10 +611,10 @@ async def test_from_text_failure():
|
|
|
611
611
|
'2 error(s) loading evaluators from registry',
|
|
612
612
|
[
|
|
613
613
|
ValueError(
|
|
614
|
-
"Evaluator 'NotAnEvaluator' is not in the provided
|
|
614
|
+
"Evaluator 'NotAnEvaluator' is not in the provided `custom_evaluator_types`. Valid choices: ['Equals', 'EqualsExpected', 'Contains', 'IsInstance', 'MaxDuration', 'LLMJudge', 'HasMatchingSpan']. If you are trying to use a custom evaluator, you must include its type in the `custom_evaluator_types` argument."
|
|
615
615
|
),
|
|
616
616
|
ValueError(
|
|
617
|
-
"Evaluator 'NotAnEvaluator' is not in the provided
|
|
617
|
+
"Evaluator 'NotAnEvaluator' is not in the provided `custom_evaluator_types`. Valid choices: ['Equals', 'EqualsExpected', 'Contains', 'IsInstance', 'MaxDuration', 'LLMJudge', 'HasMatchingSpan']. If you are trying to use a custom evaluator, you must include its type in the `custom_evaluator_types` argument."
|
|
618
618
|
),
|
|
619
619
|
],
|
|
620
620
|
)
|
|
@@ -222,10 +222,10 @@ async def test_llm_judge_evaluator(mocker: MockerFixture):
|
|
|
222
222
|
assert result.value is True
|
|
223
223
|
assert result.reason == 'Test passed'
|
|
224
224
|
|
|
225
|
-
mock_judge_output.assert_called_once_with('Hello world', 'Content contains a greeting',
|
|
225
|
+
mock_judge_output.assert_called_once_with('Hello world', 'Content contains a greeting', None)
|
|
226
226
|
|
|
227
227
|
# Test with input
|
|
228
|
-
evaluator = LLMJudge(rubric='Output contains input', include_input=True)
|
|
228
|
+
evaluator = LLMJudge(rubric='Output contains input', include_input=True, model='openai:gpt-4o')
|
|
229
229
|
result = await evaluator.evaluate(ctx)
|
|
230
230
|
assert isinstance(result, EvaluationReason)
|
|
231
231
|
assert result.value is True
|
|
@@ -7,6 +7,11 @@ import pytest
|
|
|
7
7
|
from inline_snapshot import snapshot
|
|
8
8
|
from pydantic import BaseModel, TypeAdapter
|
|
9
9
|
|
|
10
|
+
from pydantic_ai.messages import ModelMessage, ModelResponse
|
|
11
|
+
from pydantic_ai.models import Model, ModelRequestParameters
|
|
12
|
+
from pydantic_ai.settings import ModelSettings
|
|
13
|
+
from pydantic_ai.usage import Usage
|
|
14
|
+
|
|
10
15
|
from ..conftest import try_import
|
|
11
16
|
|
|
12
17
|
with try_import() as imports_successful:
|
|
@@ -108,6 +113,34 @@ async def test_evaluator_spec_serialization():
|
|
|
108
113
|
assert adapter.dump_python(spec_single_arg, context={'use_short_form': True}) == snapshot({'MyEvaluator': 'value1'})
|
|
109
114
|
|
|
110
115
|
|
|
116
|
+
async def test_llm_judge_serialization():
|
|
117
|
+
# Ensure models are serialized based on their system + name when used with LLMJudge
|
|
118
|
+
|
|
119
|
+
class MyModel(Model):
|
|
120
|
+
async def request(
|
|
121
|
+
self,
|
|
122
|
+
messages: list[ModelMessage],
|
|
123
|
+
model_settings: ModelSettings | None,
|
|
124
|
+
model_request_parameters: ModelRequestParameters,
|
|
125
|
+
) -> tuple[ModelResponse, Usage]:
|
|
126
|
+
raise NotImplementedError
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def model_name(self) -> str:
|
|
130
|
+
return 'my-model'
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def system(self) -> str:
|
|
134
|
+
return 'my-system'
|
|
135
|
+
|
|
136
|
+
adapter = TypeAdapter(Evaluator)
|
|
137
|
+
|
|
138
|
+
assert adapter.dump_python(LLMJudge(rubric='my rubric', model=MyModel())) == {
|
|
139
|
+
'name': 'LLMJudge',
|
|
140
|
+
'arguments': {'model': 'my-system:my-model', 'rubric': 'my rubric'},
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
|
|
111
144
|
async def test_evaluator_call(test_context: EvaluatorContext[TaskInput, TaskOutput, TaskMetadata]):
|
|
112
145
|
"""Test calling an Evaluator."""
|
|
113
146
|
|
|
@@ -61,9 +61,10 @@ async def test_model_simple(allow_model_requests: None):
|
|
|
61
61
|
assert m.model_name == 'gemini-1.5-flash'
|
|
62
62
|
assert 'x-goog-api-key' in m.client.headers
|
|
63
63
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
mrp = ModelRequestParameters(function_tools=[], allow_text_result=True, result_tools=[])
|
|
65
|
+
mrp = m.customize_request_parameters(mrp)
|
|
66
|
+
tools = m._get_tools(mrp)
|
|
67
|
+
tool_config = m._get_tool_config(mrp, tools)
|
|
67
68
|
assert tools is None
|
|
68
69
|
assert tool_config is None
|
|
69
70
|
|
|
@@ -93,9 +94,10 @@ async def test_model_tools(allow_model_requests: None):
|
|
|
93
94
|
{'type': 'object', 'title': 'Result', 'properties': {'spam': {'type': 'number'}}, 'required': ['spam']},
|
|
94
95
|
)
|
|
95
96
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
mrp = ModelRequestParameters(function_tools=tools, allow_text_result=True, result_tools=[result_tool])
|
|
98
|
+
mrp = m.customize_request_parameters(mrp)
|
|
99
|
+
tools = m._get_tools(mrp)
|
|
100
|
+
tool_config = m._get_tool_config(mrp, tools)
|
|
99
101
|
assert tools == snapshot(
|
|
100
102
|
_GeminiTools(
|
|
101
103
|
function_declarations=[
|
|
@@ -134,9 +136,10 @@ async def test_require_response_tool(allow_model_requests: None):
|
|
|
134
136
|
'This is the tool for the final Result',
|
|
135
137
|
{'type': 'object', 'title': 'Result', 'properties': {'spam': {'type': 'number'}}},
|
|
136
138
|
)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
139
|
+
mrp = ModelRequestParameters(function_tools=[], allow_text_result=False, result_tools=[result_tool])
|
|
140
|
+
mrp = m.customize_request_parameters(mrp)
|
|
141
|
+
tools = m._get_tools(mrp)
|
|
142
|
+
tool_config = m._get_tool_config(mrp, tools)
|
|
140
143
|
assert tools == snapshot(
|
|
141
144
|
_GeminiTools(
|
|
142
145
|
function_declarations=[
|
|
@@ -193,9 +196,9 @@ async def test_json_def_replaced(allow_model_requests: None):
|
|
|
193
196
|
'This is the tool for the final Result',
|
|
194
197
|
json_schema,
|
|
195
198
|
)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
) == snapshot(
|
|
199
|
+
mrp = ModelRequestParameters(function_tools=[], allow_text_result=True, result_tools=[result_tool])
|
|
200
|
+
mrp = m.customize_request_parameters(mrp)
|
|
201
|
+
assert m._get_tools(mrp) == snapshot(
|
|
199
202
|
_GeminiTools(
|
|
200
203
|
function_declarations=[
|
|
201
204
|
_GeminiFunction(
|
|
@@ -240,9 +243,9 @@ async def test_json_def_replaced_any_of(allow_model_requests: None):
|
|
|
240
243
|
'This is the tool for the final Result',
|
|
241
244
|
json_schema,
|
|
242
245
|
)
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
) == snapshot(
|
|
246
|
+
mrp = ModelRequestParameters(function_tools=[], allow_text_result=True, result_tools=[result_tool])
|
|
247
|
+
mrp = m.customize_request_parameters(mrp)
|
|
248
|
+
assert m._get_tools(mrp) == snapshot(
|
|
246
249
|
_GeminiTools(
|
|
247
250
|
function_declarations=[
|
|
248
251
|
_GeminiFunction(
|
|
@@ -304,7 +307,9 @@ async def test_json_def_recursive(allow_model_requests: None):
|
|
|
304
307
|
json_schema,
|
|
305
308
|
)
|
|
306
309
|
with pytest.raises(UserError, match=r'Recursive `\$ref`s in JSON Schema are not supported by Gemini'):
|
|
307
|
-
|
|
310
|
+
mrp = ModelRequestParameters(function_tools=[], allow_text_result=True, result_tools=[result_tool])
|
|
311
|
+
mrp = m.customize_request_parameters(mrp)
|
|
312
|
+
m._get_tools(mrp)
|
|
308
313
|
|
|
309
314
|
|
|
310
315
|
async def test_json_def_date(allow_model_requests: None):
|
|
@@ -335,9 +340,9 @@ async def test_json_def_date(allow_model_requests: None):
|
|
|
335
340
|
'This is the tool for the final Result',
|
|
336
341
|
json_schema,
|
|
337
342
|
)
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
) == snapshot(
|
|
343
|
+
mrp = ModelRequestParameters(function_tools=[], allow_text_result=True, result_tools=[result_tool])
|
|
344
|
+
mrp = m.customize_request_parameters(mrp)
|
|
345
|
+
assert m._get_tools(mrp) == snapshot(
|
|
341
346
|
_GeminiTools(
|
|
342
347
|
function_declarations=[
|
|
343
348
|
_GeminiFunction(
|