PyPI - pydantic-evals - Versions diffs - 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl - Mend

pydantic-evals 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

pydantic_evals/evaluators/llm_as_a_judge.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+from collections.abc import Sequence
 from textwrap import dedent
 from typing import Any
@@ -7,6 +8,7 @@ from pydantic import BaseModel, Field
 from pydantic_core import to_json
 from pydantic_ai import Agent, models
+from pydantic_ai.messages import MultiModalContentTypes, UserContent
 from pydantic_ai.settings import ModelSettings
 __all__ = (
@@ -62,16 +64,7 @@ async def judge_output(
     If the model is not specified, a default model is used. The default model starts as 'openai:gpt-4o',
     but this can be changed using the `set_default_judge_model` function.
     """
-    user_prompt = dedent(
-        f"""
-        <Output>
-        {_stringify(output)}
-        </Output>
-        <Rubric>
-        {rubric}
-        </Rubric>
-        """
-    )
+    user_prompt = _build_prompt(output=output, rubric=rubric)
     return (
         await _judge_output_agent.run(user_prompt, model=model or _default_model, model_settings=model_settings)
     ).output
@@ -112,19 +105,8 @@ async def judge_input_output(
     If the model is not specified, a default model is used. The default model starts as 'openai:gpt-4o',
     but this can be changed using the `set_default_judge_model` function.
     """
-    user_prompt = dedent(
-        f"""
-        <Input>
-        {_stringify(inputs)}
-        </Input>
-        <Output>
-        {_stringify(output)}
-        </Output>
-        <Rubric>
-        {rubric}
-        </Rubric>
-        """
-    )
+    user_prompt = _build_prompt(inputs=inputs, output=output, rubric=rubric)
     return (
         await _judge_input_output_agent.run(user_prompt, model=model or _default_model, model_settings=model_settings)
     ).output
@@ -168,22 +150,7 @@ async def judge_input_output_expected(
     If the model is not specified, a default model is used. The default model starts as 'openai:gpt-4o',
     but this can be changed using the `set_default_judge_model` function.
     """
-    user_prompt = dedent(
-        f"""
-        <Input>
-        {_stringify(inputs)}
-        </Input>
-        <ExpectedOutput>
-        {_stringify(expected_output)}
-        </ExpectedOutput>
-        <Output>
-        {_stringify(output)}
-        </Output>
-        <Rubric>
-        {rubric}
-        </Rubric>
-        """
-    )
+    user_prompt = _build_prompt(inputs=inputs, output=output, rubric=rubric, expected_output=expected_output)
     return (
         await _judge_input_output_expected_agent.run(
@@ -227,19 +194,7 @@ async def judge_output_expected(
     If the model is not specified, a default model is used. The default model starts as 'openai:gpt-4o',
     but this can be changed using the `set_default_judge_model` function.
     """
-    user_prompt = dedent(
-        f"""
-        <ExpectedOutput>
-        {_stringify(expected_output)}
-        </ExpectedOutput>
-        <Output>
-        {_stringify(output)}
-        </Output>
-        <Rubric>
-        {rubric}
-        </Rubric>
-        """
-    )
+    user_prompt = _build_prompt(output=output, rubric=rubric, expected_output=expected_output)
     return (
         await _judge_output_expected_agent.run(
             user_prompt, model=model or _default_model, model_settings=model_settings
@@ -265,3 +220,41 @@ def _stringify(value: Any) -> str:
         return to_json(value).decode()
     except Exception:
         return repr(value)
+def _build_prompt(
+    output: Any,
+    rubric: str,
+    inputs: Any | None = None,
+    expected_output: Any | None = None,
+) -> str | Sequence[str | UserContent]:
+    """Build a prompt that includes input, output, and rubric."""
+    sections: list[str | UserContent] = []
+    if inputs is not None:
+        if isinstance(inputs, str):
+            sections.append(f'<Input>\n{inputs}\n</Input>')
+        else:
+            sections.append('<Input>\n')
+            if isinstance(inputs, Sequence):
+                for item in inputs:  # type: ignore
+                    if isinstance(item, (str, MultiModalContentTypes)):
+                        sections.append(item)
+                    else:
+                        sections.append(_stringify(item))
+            elif isinstance(inputs, MultiModalContentTypes):
+                sections.append(inputs)
+            else:
+                sections.append(_stringify(inputs))
+            sections.append('</Input>')
+    sections.append(f'<Output>\n{_stringify(output)}\n</Output>')
+    sections.append(f'<Rubric>\n{rubric}\n</Rubric>')
+    if expected_output is not None:
+        sections.append(f'<ExpectedOutput>\n{_stringify(expected_output)}\n</ExpectedOutput>')
+    if inputs is None or isinstance(inputs, str):
+        return '\n\n'.join(sections)  # type: ignore[arg-type]
+    else:
+        return sections

{pydantic_evals-0.4.4.dist-info → pydantic_evals-0.4.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydantic-evals
-Version: 0.4.4
+Version: 0.4.5
 Summary: Framework for evaluating stochastic code execution, especially code making use of LLMs
 Project-URL: Homepage, https://ai.pydantic.dev/evals
 Project-URL: Source, https://github.com/pydantic/pydantic-ai
@@ -32,7 +32,7 @@ Requires-Python: >=3.9
 Requires-Dist: anyio>=0
 Requires-Dist: eval-type-backport>=0; python_version < '3.11'
 Requires-Dist: logfire-api>=1.2.0
-Requires-Dist: pydantic-ai-slim==0.4.4
+Requires-Dist: pydantic-ai-slim==0.4.5
 Requires-Dist: pydantic>=2.10
 Requires-Dist: pyyaml>=6.0.2
 Requires-Dist: rich>=13.9.4

{pydantic_evals-0.4.4.dist-info → pydantic_evals-0.4.5.dist-info}/RECORD RENAMED Viewed

@@ -9,7 +9,7 @@ pydantic_evals/evaluators/_spec.py,sha256=Xi_FHwnmAZ1x2hoJFw4MBZuG0TilNKqMRW3P74
 pydantic_evals/evaluators/common.py,sha256=ZBrNTfPJoOpT4WNXTRGS0UcKhnuhfYJxjNzum-zHFk8,12064
 pydantic_evals/evaluators/context.py,sha256=8osQRCFW8Vekw7JiiOOCHHH3HOGdhDaUlr8i-twSetg,3870
 pydantic_evals/evaluators/evaluator.py,sha256=yOEKLOxElm7_4tLcq6_myXI0e4Ei9svZP9y5DTq4SYI,11147
-pydantic_evals/evaluators/llm_as_a_judge.py,sha256=raty91NWdu_FEt4ze_ugQHCouj1o72gYe3abJBtMqlU,8793
+pydantic_evals/evaluators/llm_as_a_judge.py,sha256=xQjaGuCRXZdlExacFyR4Y4kFmwBh2QxAfEyaed_aqvk,9615
 pydantic_evals/otel/__init__.py,sha256=i2p3vDrOW039N4XM-UkozDhCm0ZmE6ZSs1yV5t03vd0,117
 pydantic_evals/otel/_context_in_memory_span_exporter.py,sha256=vIDF9-6lDuNKZuSM5hN_R8VRK4jzmdfe1DgWdXwxVbc,6758
 pydantic_evals/otel/_context_subtree.py,sha256=Iazp4w3IIBMCrkqWL-hTG-2QG_-2X81p794WG9MAsGk,1175
@@ -17,7 +17,7 @@ pydantic_evals/otel/_errors.py,sha256=aW1414eTofpA7R_DUgOeT-gj7YA6OXmm8Y4oYeFukD
 pydantic_evals/otel/span_tree.py,sha256=LV5Hsyo4riJzevHyBz8wxP82S-ry5zeKYi9bKWjGCS8,23057
 pydantic_evals/reporting/__init__.py,sha256=k_3tteqXGh0yGvgpN68gB0CjG9wzrakzDTve2GHend4,42148
 pydantic_evals/reporting/render_numbers.py,sha256=8SKlK3etbD7HnSWWHCE993ceCNLZCepVQ-SsqUIhyxk,6916
-pydantic_evals-0.4.4.dist-info/METADATA,sha256=DyDqmxe9d_3gC3QhRuUffzRb1O5Ul9bT_xRNb9_9Rr4,7938
-pydantic_evals-0.4.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-pydantic_evals-0.4.4.dist-info/licenses/LICENSE,sha256=vA6Jc482lEyBBuGUfD1pYx-cM7jxvLYOxPidZ30t_PQ,1100
-pydantic_evals-0.4.4.dist-info/RECORD,,
+pydantic_evals-0.4.5.dist-info/METADATA,sha256=6axvUhXnQNjpGjtEIsUv2FHDNQnOQeBMEZmaqzfoo_s,7938
+pydantic_evals-0.4.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+pydantic_evals-0.4.5.dist-info/licenses/LICENSE,sha256=vA6Jc482lEyBBuGUfD1pYx-cM7jxvLYOxPidZ30t_PQ,1100
+pydantic_evals-0.4.5.dist-info/RECORD,,

{pydantic_evals-0.4.4.dist-info → pydantic_evals-0.4.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{pydantic_evals-0.4.4.dist-info → pydantic_evals-0.4.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

pydantic-evals 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

pydantic-evals 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl