PyPI - deepeval - Versions diffs - 3.4.8__py3-none-any.whl → 3.5.0__py3-none-any.whl - Mend

deepeval 3.4.8py3-none-any.whl → 3.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

deepeval/__init__.py +8 -5
deepeval/_version.py +1 -1
deepeval/benchmarks/drop/drop.py +2 -3
deepeval/benchmarks/hellaswag/hellaswag.py +2 -2
deepeval/benchmarks/logi_qa/logi_qa.py +2 -2
deepeval/benchmarks/math_qa/math_qa.py +2 -2
deepeval/benchmarks/mmlu/mmlu.py +2 -2
deepeval/benchmarks/truthful_qa/truthful_qa.py +2 -2
deepeval/cli/main.py +561 -727
deepeval/confident/api.py +30 -14
deepeval/config/__init__.py +0 -0
deepeval/config/settings.py +565 -0
deepeval/config/settings_manager.py +133 -0
deepeval/config/utils.py +86 -0
deepeval/dataset/__init__.py +1 -0
deepeval/dataset/dataset.py +70 -10
deepeval/dataset/test_run_tracer.py +82 -0
deepeval/dataset/utils.py +23 -0
deepeval/integrations/pydantic_ai/__init__.py +2 -4
deepeval/integrations/pydantic_ai/{setup.py → otel.py} +0 -8
deepeval/integrations/pydantic_ai/patcher.py +376 -0
deepeval/key_handler.py +1 -0
deepeval/metrics/answer_relevancy/template.py +7 -2
deepeval/metrics/faithfulness/template.py +11 -8
deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +6 -4
deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +6 -4
deepeval/metrics/tool_correctness/tool_correctness.py +7 -3
deepeval/models/llms/amazon_bedrock_model.py +24 -3
deepeval/models/llms/grok_model.py +1 -1
deepeval/models/llms/kimi_model.py +1 -1
deepeval/models/llms/openai_model.py +37 -41
deepeval/models/retry_policy.py +280 -0
deepeval/openai_agents/agent.py +4 -2
deepeval/test_run/api.py +1 -0
deepeval/tracing/otel/exporter.py +20 -8
deepeval/tracing/otel/utils.py +57 -0
deepeval/tracing/perf_epoch_bridge.py +4 -4
deepeval/tracing/tracing.py +37 -16
deepeval/tracing/utils.py +98 -1
deepeval/utils.py +111 -70
{deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/METADATA +16 -13
{deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/RECORD +45 -40
deepeval/env.py +0 -35
deepeval/integrations/pydantic_ai/agent.py +0 -364
{deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/LICENSE.md +0 -0
{deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/WHEEL +0 -0
{deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/entry_points.txt +0 -0

deepeval/integrations/pydantic_ai/patcher.py ADDED Viewed

@@ -0,0 +1,376 @@
+import functools
+import deepeval
+from deepeval.tracing.types import LlmOutput, LlmToolCall
+from pydantic_ai.agent import AgentRunResult
+from deepeval.tracing.context import current_trace_context
+from deepeval.tracing.types import AgentSpan, LlmSpan
+from deepeval.tracing.tracing import Observer
+from typing import List, Callable, Optional, Any
+from deepeval.test_case.llm_test_case import ToolCall
+from deepeval.metrics.base_metric import BaseMetric
+from deepeval.confident.api import get_confident_api_key
+from deepeval.integrations.pydantic_ai.otel import instrument_pydantic_ai
+from deepeval.telemetry import capture_tracing_integration
+from deepeval.prompt import Prompt
+try:
+    from pydantic_ai.agent import Agent
+    from pydantic_ai.models import Model
+    from pydantic_ai.messages import (
+        ModelResponse,
+        ModelRequest,
+        ModelResponsePart,
+        TextPart,
+        ToolCallPart,
+        SystemPromptPart,
+        ToolReturnPart,
+        UserPromptPart,
+    )
+    pydantic_ai_installed = True
+except:
+    pydantic_ai_installed = True
+def _patch_agent_tool_decorator():
+    original_tool = Agent.tool
+    @functools.wraps(original_tool)
+    def wrapper(
+        *args,
+        metrics: Optional[List[BaseMetric]] = None,
+        metric_collection: Optional[str] = None,
+        **kwargs
+    ):
+        # Case 1: Direct decoration - @agent.tool
+        if args and callable(args[0]):
+            patched_func = _create_patched_tool(
+                args[0], metrics, metric_collection
+            )
+            new_args = (patched_func,) + args[1:]
+            return original_tool(*new_args, **kwargs)
+        # Case 2: Decoration with arguments - @agent.tool(metrics=..., metric_collection=...)
+        else:
+            # Return a decorator function that will receive the actual function
+            def decorator(func):
+                patched_func = _create_patched_tool(
+                    func, metrics, metric_collection
+                )
+                return original_tool(*args, **kwargs)(patched_func)
+            return decorator
+    Agent.tool = wrapper
+def _create_patched_tool(
+    func: Callable,
+    metrics: Optional[List[BaseMetric]] = None,
+    metric_collection: Optional[str] = None,
+):
+    import asyncio
+    original_func = func
+    is_async = asyncio.iscoroutinefunction(original_func)
+    if is_async:
+        @functools.wraps(original_func)
+        async def async_wrapper(*args, **kwargs):
+            with Observer(
+                span_type="tool",
+                func_name=original_func.__name__,
+                metrics=metrics,
+                metric_collection=metric_collection,
+                function_kwargs={"args": args, **kwargs},
+            ) as observer:
+                result = await original_func(*args, **kwargs)
+                observer.result = result
+            return result
+        return async_wrapper
+    else:
+        @functools.wraps(original_func)
+        def sync_wrapper(*args, **kwargs):
+            with Observer(
+                span_type="tool",
+                func_name=original_func.__name__,
+                metrics=metrics,
+                metric_collection=metric_collection,
+                function_kwargs={"args": args, **kwargs},
+            ) as observer:
+                result = original_func(*args, **kwargs)
+                observer.result = result
+            return result
+        return sync_wrapper
+def _patch_agent_init():
+    original_init = Agent.__init__
+    @functools.wraps(original_init)
+    def wrapper(
+        self,
+        *args,
+        llm_metric_collection: Optional[str] = None,
+        llm_metrics: Optional[List[BaseMetric]] = None,
+        llm_prompt: Optional[Prompt] = None,
+        agent_metric_collection: Optional[str] = None,
+        agent_metrics: Optional[List[BaseMetric]] = None,
+        **kwargs
+    ):
+        result = original_init(self, *args, **kwargs)
+        _patch_llm_model(
+            self._model, llm_metric_collection, llm_metrics, llm_prompt
+        )  # runtime patch of the model
+        _patch_agent_run(agent_metric_collection, agent_metrics)
+        return result
+    Agent.__init__ = wrapper
+def _patch_agent_run(
+    agent_metric_collection: Optional[str] = None,
+    agent_metrics: Optional[List[BaseMetric]] = None,
+):
+    original_run = Agent.run
+    @functools.wraps(original_run)
+    async def wrapper(
+        *args,
+        trace_metric_collection: Optional[str] = None,
+        trace_metrics: Optional[List[BaseMetric]] = None,
+        trace_name: Optional[str] = None,
+        trace_tags: Optional[List[str]] = None,
+        trace_metadata: Optional[dict] = None,
+        trace_thread_id: Optional[str] = None,
+        trace_user_id: Optional[str] = None,
+        **kwargs
+    ):
+        with Observer(
+            span_type="agent",
+            func_name="Agent",
+            function_kwargs={"input": args[1]},
+            metrics=agent_metrics,
+            metric_collection=agent_metric_collection,
+        ) as observer:
+            result = await original_run(*args, **kwargs)
+            observer.update_span_properties = (
+                lambda agent_span: set_agent_span_attributes(agent_span, result)
+            )
+            observer.result = result.output
+            _update_trace_context(
+                trace_name=trace_name,
+                trace_tags=trace_tags,
+                trace_metadata=trace_metadata,
+                trace_thread_id=trace_thread_id,
+                trace_user_id=trace_user_id,
+                trace_metric_collection=trace_metric_collection,
+                trace_metrics=trace_metrics,
+                trace_input=args[1],
+                trace_output=result.output,
+            )
+        return result
+    Agent.run = wrapper
+def _update_trace_context(
+    trace_name: Optional[str] = None,
+    trace_tags: Optional[List[str]] = None,
+    trace_metadata: Optional[dict] = None,
+    trace_thread_id: Optional[str] = None,
+    trace_user_id: Optional[str] = None,
+    trace_metric_collection: Optional[str] = None,
+    trace_metrics: Optional[List[BaseMetric]] = None,
+    trace_input: Optional[Any] = None,
+    trace_output: Optional[Any] = None,
+):
+    current_trace = current_trace_context.get()
+    current_trace.name = trace_name
+    current_trace.tags = trace_tags
+    current_trace.metadata = trace_metadata
+    current_trace.thread_id = trace_thread_id
+    current_trace.user_id = trace_user_id
+    current_trace.metric_collection = trace_metric_collection
+    current_trace.metrics = trace_metrics
+    current_trace.input = trace_input
+    current_trace.output = trace_output
+def _patch_llm_model(
+    model: Model,
+    llm_metric_collection: Optional[str] = None,
+    llm_metrics: Optional[List[BaseMetric]] = None,
+    llm_prompt: Optional[Prompt] = None,
+):
+    original_func = model.request
+    try:
+        model_name = model.model_name
+    except Exception:
+        model_name = "unknown"
+    @functools.wraps(original_func)
+    async def wrapper(*args, **kwargs):
+        with Observer(
+            span_type="llm",
+            func_name="LLM",
+            observe_kwargs={"model": model_name},
+            metrics=llm_metrics,
+            metric_collection=llm_metric_collection,
+        ) as observer:
+            result = await original_func(*args, **kwargs)
+            request = kwargs.get("messages", [])
+            if not request:
+                request = args[0]
+            observer.update_span_properties = (
+                lambda llm_span: set_llm_span_attributes(
+                    llm_span, args[0], result, llm_prompt
+                )
+            )
+            observer.result = result
+        return result
+    model.request = wrapper
+def instrument(otel: Optional[bool] = False, api_key: Optional[str] = None):
+    if api_key:
+        deepeval.login(api_key)
+    api_key = get_confident_api_key()
+    if not api_key:
+        raise ValueError("No api key provided.")
+    if otel:
+        instrument_pydantic_ai(api_key)
+    else:
+        with capture_tracing_integration("pydantic_ai"):
+            _patch_agent_init()
+            _patch_agent_tool_decorator()
+def set_llm_span_attributes(
+    llm_span: LlmSpan,
+    requests: List[ModelRequest],
+    result: ModelResponse,
+    llm_prompt: Optional[Prompt] = None,
+):
+    llm_span.prompt = llm_prompt
+    input = []
+    for request in requests:
+        for part in request.parts:
+            if isinstance(part, SystemPromptPart):
+                input.append({"role": "System", "content": part.content})
+            elif isinstance(part, UserPromptPart):
+                input.append({"role": "User", "content": part.content})
+            elif isinstance(part, ToolCallPart):
+                input.append(
+                    {
+                        "role": "Tool Call",
+                        "name": part.tool_name,
+                        "content": part.args_as_json_str(),
+                    }
+                )
+            elif isinstance(part, ToolReturnPart):
+                input.append(
+                    {
+                        "role": "Tool Return",
+                        "name": part.tool_name,
+                        "content": part.model_response_str(),
+                    }
+                )
+    llm_span.input = input
+    content = ""
+    tool_calls = []
+    for part in result.parts:
+        if isinstance(part, TextPart):
+            content += part.content + "\n"
+        elif isinstance(part, ToolCallPart):
+            tool_calls.append(
+                LlmToolCall(name=part.tool_name, args=part.args_as_dict())
+            )
+    llm_span.output = LlmOutput(
+        role="Assistant", content=content, tool_calls=tool_calls
+    )
+    llm_span.tools_called = _extract_tools_called_from_llm_response(
+        result.parts
+    )
+def set_agent_span_attributes(agent_span: AgentSpan, result: AgentRunResult):
+    agent_span.tools_called = _extract_tools_called(result)
+# llm tools called
+def _extract_tools_called_from_llm_response(
+    result: List[ModelResponsePart],
+) -> List[ToolCall]:
+    tool_calls = []
+    # Loop through each ModelResponsePart
+    for part in result:
+        # Look for parts with part_kind="tool-call"
+        if hasattr(part, "part_kind") and part.part_kind == "tool-call":
+            # Extract tool name and args from the ToolCallPart
+            tool_name = part.tool_name
+            input_parameters = (
+                part.args_as_dict() if hasattr(part, "args_as_dict") else None
+            )
+            # Create and append ToolCall object
+            tool_call = ToolCall(
+                name=tool_name, input_parameters=input_parameters
+            )
+            tool_calls.append(tool_call)
+    return tool_calls
+# TODO: llm tools called (reposne is present next message)
+def _extract_tools_called(result: AgentRunResult) -> List[ToolCall]:
+    tool_calls = []
+    # Access the message history from the _state
+    message_history = result._state.message_history
+    # Scan through all messages in the history
+    for message in message_history:
+        # Check if this is a ModelResponse (kind="response")
+        if hasattr(message, "kind") and message.kind == "response":
+            # For ModelResponse messages, check each part
+            if hasattr(message, "parts"):
+                for part in message.parts:
+                    # Look for parts with part_kind="tool-call"
+                    if (
+                        hasattr(part, "part_kind")
+                        and part.part_kind == "tool-call"
+                    ):
+                        # Extract tool name and args from the ToolCallPart
+                        tool_name = part.tool_name
+                        input_parameters = (
+                            part.args_as_dict()
+                            if hasattr(part, "args_as_dict")
+                            else None
+                        )
+                        # Create and append ToolCall object
+                        tool_call = ToolCall(
+                            name=tool_name, input_parameters=input_parameters
+                        )
+                        tool_calls.append(tool_call)
+    return tool_calls

deepeval/key_handler.py CHANGED Viewed

@@ -80,6 +80,7 @@ class ModelKeyValues(Enum):
     OPENAI_MODEL_NAME = "OPENAI_MODEL_NAME"
     OPENAI_COST_PER_INPUT_TOKEN = "OPENAI_COST_PER_INPUT_TOKEN"
     OPENAI_COST_PER_OUTPUT_TOKEN = "OPENAI_COST_PER_OUTPUT_TOKEN"
+    OPENAI_API_KEY = "OPENAI_API_KEY"
     # Moonshot
     USE_MOONSHOT_MODEL = "USE_MOONSHOT_MODEL"
     MOONSHOT_MODEL_NAME = "MOONSHOT_MODEL_NAME"

deepeval/metrics/answer_relevancy/template.py CHANGED Viewed

@@ -37,7 +37,7 @@ JSON:
 Please generate a list of JSON with two keys: `verdict` and `reason`.
 The 'verdict' key should STRICTLY be either a 'yes', 'idk' or 'no'. Answer 'yes' if the statement is relevant to addressing the original input, 'no' if the statement is irrelevant, and 'idk' if it is ambiguous (eg., not directly relevant but could be used as a supporting point to address the input).
 The 'reason' is the reason for the verdict.
-Provide a 'reason' ONLY if the answer is 'no'.
+Provide a 'reason' ONLY if the answer is 'no' or 'idk'.
 The provided statements are statements made in the actual output.
 **
@@ -53,7 +53,8 @@ Example statements:
     "Security features include fingerprint authentication and an encrypted SSD.",
     "Every purchase comes with a one-year warranty.",
     "24/7 customer support is included.",
-    "Pineapples taste great on pizza."
+    "Pineapples taste great on pizza.",
+    "The laptop is a Dell XPS 13."
 ]
 Example JSON:
@@ -79,6 +80,10 @@ Example JSON:
         {{
             "verdict": "no",
             "reason": "The statement about pineapples on pizza is completely irrelevant to the input, which asks about laptop features."
+        }},
+        {{
+            "verdict": "idk",
+            "reason": "The statement about the laptop being a Dell XPS 13 is not directly relevant to the input, but could be used as a supporting point to address the input."
         }}
     ]
 }}

deepeval/metrics/faithfulness/template.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Optional, List
 class FaithfulnessTemplate:
     @staticmethod
     def generate_claims(actual_output: str):
-        return f"""Based on the given text, please extract a comprehensive list of FACTUAL, undisputed truths, that can inferred from the provided text.
+        return f"""Based on the given text, please extract a comprehensive list of FACTUAL, undisputed truths, that can inferred from the provided actual AI output.
 These truths, MUST BE COHERENT, and CANNOT be taken out of context.
 Example:
@@ -24,9 +24,10 @@ Example JSON:
 IMPORTANT: Please make sure to only return in JSON format, with the "claims" key as a list of strings. No words or explanation is needed.
 Only include claims that are factual, BUT IT DOESN'T MATTER IF THEY ARE FACTUALLY CORRECT. The claims you extract should include the full context it was presented in, NOT cherry picked facts.
 You should NOT include any prior knowledge, and take the text at face value when extracting claims.
+You should be aware that it is an AI that is outputting these claims.
 **
-Text:
+AI Output:
 {actual_output}
 JSON:
@@ -72,7 +73,7 @@ JSON:
     def generate_verdicts(claims: List[str], retrieval_context: str):
         return f"""Based on the given claims, which is a list of strings, generate a list of JSON objects to indicate whether EACH claim contradicts any facts in the retrieval context. The JSON will have 2 fields: 'verdict' and 'reason'.
 The 'verdict' key should STRICTLY be either 'yes', 'no', or 'idk', which states whether the given claim agrees with the context.
-Provide a 'reason' ONLY if the answer is 'no'.
+Provide a 'reason' ONLY if the answer is 'no' or 'idk'.
 The provided claim is drawn from the actual output. Try to provide a correction in the reason using the facts in the retrieval context.
 **
@@ -84,28 +85,30 @@ Example:
 {{
     "verdicts": [
         {{
-            "verdict": "idk"
+            "verdict": "idk",
+            "reason": "The claim about Barack Obama is although incorrect, it is not directly addressed in the retrieval context, and so poses no contradiction."
         }},
         {{
-            "verdict": "idk"
+            "verdict": "idk",
+            "reason": "The claim about Zurich being a city in London is incorrect but does not pose a contradiction to the retrieval context."
         }},
         {{
             "verdict": "yes"
         }},
         {{
             "verdict": "no",
-            "reason": "The actual output claims Einstein won the Nobel Prize in 1969, which is untrue as the retrieval context states it is 1968 instead."
+            "reason": "The actual output claims Einstein won the Nobel Prize in 1969, which is untrue as the retrieval context states it is 1968 instead. This contradicts the retrieval context."
         }},
         {{
             "verdict": "no",
-            "reason": "The actual output claims Einstein is a German chef, which is not correct as the retrieval context states he was a German scientist instead."
+            "reason": "The actual output claims Einstein is a German chef, which is not correct as the retrieval context states he was a German scientist instead. This contradicts the retrieval context."
         }},
     ]
 }}
 ===== END OF EXAMPLE ======
 The length of 'verdicts' SHOULD BE STRICTLY EQUAL to that of claims.
-You DON'T have to provide a reason if the answer is 'yes' or 'idk'.
+You DON'T have to provide a reason if the answer is 'yes'.
 ONLY provide a 'no' answer if the retrieval context DIRECTLY CONTRADICTS the claims. YOU SHOULD NEVER USE YOUR PRIOR KNOWLEDGE IN YOUR JUDGEMENT.
 Claims made using vague, suggestive, speculative language such as 'may have', 'possibility due to', does NOT count as a contradiction.
 Claims that are not backed up by the retrieval context or are not mentioned in it MUST be answered 'idk'.

deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py CHANGED Viewed

@@ -39,7 +39,7 @@ class MultimodalAnswerRelevancyTemplate:
                     Please generate a list of JSON with two keys: `verdict` and `reason`.
                     The 'verdict' key should STRICTLY be either a 'yes', 'idk' or 'no'. Answer 'yes' if the statement or image is relevant to addressing the original input, 'no' if the statement or image is irrelevant, and 'idk' if it is ambiguous (eg., not directly relevant but could be used as a supporting point to address the input).
                     The 'reason' is the reason for the verdict.
-                    Provide a 'reason' ONLY if the answer is 'no'.
+                    Provide a 'reason' ONLY if the answer is 'no' or 'idk'.
                     The provided statements are statements and images generated in the actual output.
                     **
@@ -54,13 +54,15 @@ class MultimodalAnswerRelevancyTemplate:
                                 "reason": "The 'Shoes.' statement made in the actual output is completely irrelevant to the input, which asks about what to do in the event of an earthquake."
                             }},
                             {{
-                                "verdict": "idk"
+                                "verdict": "idk",
+                                "reason": "The statement thanking the user for asking the question is not directly relevant to the input, but is not entirely irrelevant."
                             }},
                             {{
-                                "verdict": "idk"
+                                "verdict": "idk",
+                                "reason": "The question about whether there is anything else the user can help with is not directly relevant to the input, but is not entirely irrelevant."
                             }},
                             {{
-                                "verdict": "yes"
+                                "verdict": "yes",
                             }}
                         ]
                     }}

deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py CHANGED Viewed

@@ -95,7 +95,7 @@ class MultimodalFaithfulnessTemplate:
         return textwrap.dedent(
             f"""Based on the given claims, which is a list of strings, generate a list of JSON objects to indicate whether EACH claim contradicts any facts in the retrieval context. The JSON will have 2 fields: 'verdict' and 'reason'.
                 The 'verdict' key should STRICTLY be either 'yes', 'no', or 'idk', which states whether the given claim agrees with the context.
-                Provide a 'reason' ONLY if the answer is 'no'.
+                Provide a 'reason' ONLY if the answer is 'no' or 'idk'.
                 The provided claim is drawn from the actual output. Try to provide a correction in the reason using the facts in the retrieval context.
                 **
@@ -107,10 +107,12 @@ class MultimodalFaithfulnessTemplate:
                 {{
                     "verdicts": [
                         {{
-                            "verdict": "idk"
+                            "verdict": "idk",
+                            "reason": "The claim about Barack Obama is not directly addressed in the retrieval context, and so poses no contradiction."
                         }},
                         {{
-                            "verdict": "idk"
+                            "verdict": "idk",
+                            "reason": "The claim about Zurich being a city in London is incorrect but does not pose a contradiction to the retrieval context."
                         }},
                         {{
                             "verdict": "yes"
@@ -128,7 +130,7 @@ class MultimodalFaithfulnessTemplate:
                 ===== END OF EXAMPLE ======
                 The length of 'verdicts' SHOULD BE STRICTLY EQUAL to that of claims.
-                You DON'T have to provide a reason if the answer is 'yes' or 'idk'.
+                You DON'T have to provide a reason if the answer is 'yes'.
                 ONLY provide a 'no' answer if the retrieval context DIRECTLY CONTRADICTS the claims. YOU SHOULD NEVER USE YOUR PRIOR KNOWLEDGE IN YOUR JUDGEMENT.
                 Claims made using vague, suggestive, speculative language such as 'may have', 'possibility due to', does NOT count as a contradiction.
                 Claims that is not backed up due to a lack of information/is not mentioned in the retrieval contexts MUST be answered 'idk', otherwise I WILL DIE.

deepeval/metrics/tool_correctness/tool_correctness.py CHANGED Viewed

@@ -223,9 +223,13 @@ class ToolCorrectnessMetric(BaseMetric):
                 total_score += best_score
                 matched_called_tools.add(best_called_tool)
         return (
-            total_score / len(self.expected_tools)
-            if self.expected_tools
-            else 0.0
+            1.0
+            if not self.expected_tools and not self.tools_called
+            else (
+                0.0
+                if not self.expected_tools
+                else total_score / len(self.expected_tools)
+            )
         )
     # Consider ordering score

deepeval/models/llms/amazon_bedrock_model.py CHANGED Viewed

@@ -115,13 +115,34 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
     ###############################################
     def get_converse_request_body(self, prompt: str) -> dict:
+        # Inline parameter translation with defaults
+        param_mapping = {
+            "max_tokens": "maxTokens",
+            "top_p": "topP",
+            "top_k": "topK",
+            "stop_sequences": "stopSequences",
+        }
+        # Start with defaults for required parameters
+        translated_kwargs = {
+            "maxTokens": self.generation_kwargs.get("max_tokens", 1000),
+            "topP": self.generation_kwargs.get("top_p", 0),
+        }
+        # Add any other parameters from generation_kwargs
+        for key, value in self.generation_kwargs.items():
+            if key not in [
+                "max_tokens",
+                "top_p",
+            ]:  # Skip already handled defaults
+                aws_key = param_mapping.get(key, key)
+                translated_kwargs[aws_key] = value
         return {
             "messages": [{"role": "user", "content": [{"text": prompt}]}],
             "inferenceConfig": {
                 "temperature": self.temperature,
-                "topP": self.generation_kwargs.get("top_p", 0),
-                "maxTokens": self.generation_kwargs.get("max_tokens", 1000),
-                **self.generation_kwargs,
+                **translated_kwargs,
             },
         }

deepeval/models/llms/grok_model.py CHANGED Viewed

@@ -30,7 +30,7 @@ model_pricing = {
     },
     "grok-3-fast": {
         "input": 0.60 / 1e6,
-        "output": 2.50 / 1e-6,
+        "output": 2.50 / 1e6,
     },
     "grok-3-mini-fast": {
         "input": 30 / 1e6,

deepeval/models/llms/kimi_model.py CHANGED Viewed

@@ -30,7 +30,7 @@ model_pricing = {
     },
     "kimi-k2-0711-preview": {
         "input": 0.60 / 1e6,
-        "output": 2.50 / 1e-6,
+        "output": 2.50 / 1e6,
     },
     "kimi-thinking-preview": {
         "input": 30 / 1e6,

deepeval 3.4.8__py3-none-any.whl → 3.5.0__py3-none-any.whl

deepeval 3.4.8py3-none-any.whl → 3.5.0py3-none-any.whl