PyPI - deepeval - Versions diffs - 3.6.2__py3-none-any.whl → 3.6.4__py3-none-any.whl - Mend

deepeval 3.6.2py3-none-any.whl → 3.6.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

deepeval/_version.py +1 -1
deepeval/confident/api.py +1 -0
deepeval/metrics/g_eval/g_eval.py +3 -2
deepeval/metrics/tool_correctness/tool_correctness.py +12 -7
deepeval/models/llms/amazon_bedrock_model.py +3 -31
deepeval/models/llms/openai_model.py +0 -1
deepeval/models/llms/utils.py +22 -0
deepeval/prompt/api.py +2 -0
deepeval/prompt/prompt.py +355 -148
deepeval/tracing/otel/utils.py +52 -35
{deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/METADATA +1 -1
{deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/RECORD +15 -15
{deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/LICENSE.md +0 -0
{deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/WHEEL +0 -0
{deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/entry_points.txt +0 -0

deepeval/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__: str = "3.6.2"
1	+ __version__: str = "3.6.4"

deepeval/confident/api.py CHANGED Viewed

@@ -90,6 +90,7 @@ class Endpoints(Enum):
     TRACES_ENDPOINT = "/v1/traces"
     ANNOTATIONS_ENDPOINT = "/v1/annotations"
     PROMPTS_VERSION_ID_ENDPOINT = "/v1/prompts/:alias/versions/:versionId"
+    PROMPTS_LABEL_ENDPOINT = "/v1/prompts/:alias/labels/:label"
     PROMPTS_ENDPOINT = "/v1/prompts"
     PROMPTS_VERSIONS_ENDPOINT = "/v1/prompts/:alias/versions"
     SIMULATE_ENDPOINT = "/v1/simulate"

deepeval/metrics/g_eval/g_eval.py CHANGED Viewed

@@ -97,7 +97,8 @@ class GEval(BaseMetric):
                     test_case, _additional_context=_additional_context
                 )
                 self.score = (
-                    float(g_score) / self.score_range_span
+                    (float(g_score) - self.score_range[0])
+                    / self.score_range_span
                     if not self.strict_mode
                     else int(g_score)
                 )
@@ -140,7 +141,7 @@ class GEval(BaseMetric):
                 test_case, _additional_context=_additional_context
             )
             self.score = (
-                float(g_score) / self.score_range_span
+                (float(g_score) - self.score_range[0]) / self.score_range_span
                 if not self.strict_mode
                 else int(g_score)
             )

deepeval/metrics/tool_correctness/tool_correctness.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Union, Dict
+from typing import List, Dict
 from deepeval.metrics.indicator import metric_progress_indicator
 from deepeval.metrics.utils import (
@@ -152,14 +152,19 @@ class ToolCorrectnessMetric(BaseMetric):
     # Calculate score
     def _calculate_score(self):
-        # Fix: handle empty expected_tools to avoid ZeroDivisionError
-        if len(self.expected_tools) == 0:
-            score = 1.0 if len(self.tools_called) == 0 else 0.0
-        elif self.should_exact_match:
+        if self.should_exact_match:
             score = self._calculate_exact_match_score()
         elif self.should_consider_ordering:
             _, weighted_length = self._compute_weighted_lcs()
-            score = weighted_length / len(self.expected_tools)
+            if (
+                len(self.tools_called) == len(self.expected_tools)
+                and len(self.expected_tools) == 0
+            ):
+                score = 1.0
+            elif len(self.expected_tools) == 0:
+                score = 0.0
+            else:
+                score = weighted_length / len(self.expected_tools)
         else:
             score = self._calculate_non_exact_match_score()
         return 0 if self.strict_mode and score < self.threshold else score
@@ -294,7 +299,7 @@ class ToolCorrectnessMetric(BaseMetric):
     def is_successful(self) -> bool:
         try:
             self.success = self.score >= self.threshold
-        except:
+        except (AttributeError, TypeError):
             self.success = False
         return self.success

deepeval/models/llms/amazon_bedrock_model.py CHANGED Viewed

@@ -9,7 +9,7 @@ from deepeval.models.retry_policy import (
     sdk_retries_for,
 )
 from deepeval.models import DeepEvalBaseLLM
-from deepeval.models.llms.utils import trim_and_load_json
+from deepeval.models.llms.utils import trim_and_load_json, safe_asyncio_run
 from deepeval.constants import ProviderSlug as PS
 # check aiobotocore availability
@@ -40,7 +40,6 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
         region_name: str,
         aws_access_key_id: Optional[str] = None,
         aws_secret_access_key: Optional[str] = None,
-        temperature: float = 0,
         input_token_cost: float = 0,
         output_token_cost: float = 0,
         generation_kwargs: Optional[Dict] = None,
@@ -53,13 +52,9 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
         self.region_name = region_name
         self.aws_access_key_id = aws_access_key_id
         self.aws_secret_access_key = aws_secret_access_key
-        self.temperature = temperature
         self.input_token_cost = input_token_cost
         self.output_token_cost = output_token_cost
-        if self.temperature < 0:
-            raise ValueError("Temperature must be >= 0.")
         # prepare aiobotocore session, config, and async exit stack
         self._session = get_session()
         self._exit_stack = AsyncExitStack()
@@ -75,7 +70,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
     def generate(
         self, prompt: str, schema: Optional[BaseModel] = None
     ) -> Tuple[Union[str, Dict], float]:
-        return asyncio.run(self.a_generate(prompt, schema))
+        return safe_asyncio_run(self.a_generate(prompt, schema))
     @retry_bedrock
     async def a_generate(
@@ -142,34 +137,11 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
     ###############################################
     def get_converse_request_body(self, prompt: str) -> dict:
-        # Inline parameter translation with defaults
-        param_mapping = {
-            "max_tokens": "maxTokens",
-            "top_p": "topP",
-            "top_k": "topK",
-            "stop_sequences": "stopSequences",
-        }
-        # Start with defaults for required parameters
-        translated_kwargs = {
-            "maxTokens": self.generation_kwargs.get("max_tokens", 1000),
-            "topP": self.generation_kwargs.get("top_p", 0),
-        }
-        # Add any other parameters from generation_kwargs
-        for key, value in self.generation_kwargs.items():
-            if key not in [
-                "max_tokens",
-                "top_p",
-            ]:  # Skip already handled defaults
-                aws_key = param_mapping.get(key, key)
-                translated_kwargs[aws_key] = value
         return {
             "messages": [{"role": "user", "content": [{"text": prompt}]}],
             "inferenceConfig": {
-                "temperature": self.temperature,
-                **translated_kwargs,
+                **self.generation_kwargs,
             },
         }

deepeval/models/llms/openai_model.py CHANGED Viewed

@@ -204,7 +204,6 @@ models_requiring_temperature_1 = [
     "gpt-5-mini-2025-08-07",
     "gpt-5-nano",
     "gpt-5-nano-2025-08-07",
-    "gpt-5-chat-latest",
 ]

deepeval/models/llms/utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import Dict
 import re
 import json
+import asyncio
 def trim_and_load_json(
@@ -20,3 +21,24 @@ def trim_and_load_json(
         raise ValueError(error_str)
     except Exception as e:
         raise Exception(f"An unexpected error occurred: {str(e)}")
+def safe_asyncio_run(coro):
+    """
+    Run an async coroutine safely.
+    Falls back to run_until_complete if already in a running event loop.
+    """
+    try:
+        return asyncio.run(coro)
+    except RuntimeError:
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                future = asyncio.ensure_future(coro)
+                return loop.run_until_complete(future)
+            else:
+                return loop.run_until_complete(coro)
+        except Exception as inner_e:
+            raise
+    except Exception as e:
+        raise

deepeval/prompt/api.py CHANGED Viewed

@@ -45,6 +45,8 @@ class PromptVersionsHttpResponse(BaseModel):
 class PromptHttpResponse(BaseModel):
     id: str
+    version: str
+    label: Optional[str] = None
     text: Optional[str] = None
     messages: Optional[List[PromptMessage]] = None
     interpolation_type: PromptInterpolationType = Field(

deepeval/prompt/prompt.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Optional, List, Dict
+from typing import Literal, Optional, List, Dict
 from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
 from rich.console import Console
 import time
@@ -7,6 +7,8 @@ import json
 import os
 from pydantic import BaseModel
 import asyncio
+import portalocker
+import threading
 from deepeval.prompt.api import (
     PromptHttpResponse,
@@ -19,12 +21,38 @@ from deepeval.prompt.api import (
 from deepeval.prompt.utils import interpolate_text
 from deepeval.confident.api import Api, Endpoints, HttpMethods
 from deepeval.constants import HIDDEN_DIR
-from deepeval.utils import (
-    get_or_create_event_loop,
-    get_or_create_general_event_loop,
-)
 CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-prompt-cache.json"
+VERSION_CACHE_KEY = "version"
+LABEL_CACHE_KEY = "label"
+# Global background event loop for polling
+_polling_loop: Optional[asyncio.AbstractEventLoop] = None
+_polling_thread: Optional[threading.Thread] = None
+_polling_loop_lock = threading.Lock()
+def _get_or_create_polling_loop() -> asyncio.AbstractEventLoop:
+    """Get or create a background event loop for polling that runs in a daemon thread."""
+    global _polling_loop, _polling_thread
+    with _polling_loop_lock:
+        if _polling_loop is None or not _polling_loop.is_running():
+            def run_loop():
+                global _polling_loop
+                _polling_loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(_polling_loop)
+                _polling_loop.run_forever()
+            _polling_thread = threading.Thread(target=run_loop, daemon=True)
+            _polling_thread.start()
+            # Wait for loop to be ready
+            while _polling_loop is None:
+                time.sleep(0.01)
+        return _polling_loop
 class CustomEncoder(json.JSONEncoder):
@@ -39,6 +67,7 @@ class CustomEncoder(json.JSONEncoder):
 class CachedPrompt(BaseModel):
     alias: str
     version: str
+    label: Optional[str] = None
     template: Optional[str]
     messages_template: Optional[List[PromptMessage]]
     prompt_version_id: str
@@ -50,6 +79,7 @@ class CachedPrompt(BaseModel):
 class Prompt:
+    label: Optional[str] = None
     _prompt_version_id: Optional[str] = None
     _type: Optional[PromptType] = None
     _interpolation_type: Optional[PromptInterpolationType] = None
@@ -73,13 +103,24 @@ class Prompt:
         self._text_template = template
         self._messages_template = messages_template
         self._version = None
-        self._polling_tasks: Dict[str, asyncio.Task] = {}
-        self._refresh_map: Dict[str, int] = {}
+        self._polling_tasks: Dict[str, Dict[str, asyncio.Task]] = {}
+        self._refresh_map: Dict[str, Dict[str, int]] = {}
+        self._lock = (
+            threading.Lock()
+        )  # Protect instance attributes from race conditions
         if template:
             self._type = PromptType.TEXT
         elif messages_template:
             self._type = PromptType.LIST
+    def __del__(self):
+        """Cleanup polling tasks when instance is destroyed"""
+        try:
+            self._stop_polling()
+        except Exception:
+            # Suppress exceptions during cleanup to avoid issues in interpreter shutdown
+            pass
     @property
     def version(self):
         if self._version is not None and self._version != "latest":
@@ -95,33 +136,37 @@ class Prompt:
         self._version = value
     def interpolate(self, **kwargs):
-        if self._type == PromptType.TEXT:
-            if self._text_template is None:
+        with self._lock:
+            prompt_type = self._type
+            text_template = self._text_template
+            messages_template = self._messages_template
+            interpolation_type = self._interpolation_type
+        if prompt_type == PromptType.TEXT:
+            if text_template is None:
                 raise TypeError(
                     "Unable to interpolate empty prompt template. Please pull a prompt from Confident AI or set template manually to continue."
                 )
-            return interpolate_text(
-                self._interpolation_type, self._text_template, **kwargs
-            )
+            return interpolate_text(interpolation_type, text_template, **kwargs)
-        elif self._type == PromptType.LIST:
-            if self._messages_template is None:
+        elif prompt_type == PromptType.LIST:
+            if messages_template is None:
                 raise TypeError(
                     "Unable to interpolate empty prompt template messages. Please pull a prompt from Confident AI or set template manually to continue."
                 )
             interpolated_messages = []
-            for message in self._messages_template:
+            for message in messages_template:
                 interpolated_content = interpolate_text(
-                    self._interpolation_type, message.content, **kwargs
+                    interpolation_type, message.content, **kwargs
                 )
                 interpolated_messages.append(
                     {"role": message.role, "content": interpolated_content}
                 )
             return interpolated_messages
         else:
-            raise ValueError(f"Unsupported prompt type: {self._type}")
+            raise ValueError(f"Unsupported prompt type: {prompt_type}")
     def _get_versions(self) -> List:
         if self.alias is None:
@@ -138,111 +183,205 @@ class Prompt:
         return versions.text_versions or versions.messages_versions or []
     def _read_from_cache(
-        self, alias: str, version: Optional[str] = None
+        self,
+        alias: str,
+        version: Optional[str] = None,
+        label: Optional[str] = None,
     ) -> Optional[CachedPrompt]:
         if not os.path.exists(CACHE_FILE_NAME):
-            raise Exception("No Prompt cache file found")
+            return None
         try:
-            with open(CACHE_FILE_NAME, "r") as f:
+            # Use shared lock for reading to allow concurrent reads
+            with portalocker.Lock(
+                CACHE_FILE_NAME,
+                mode="r",
+                flags=portalocker.LOCK_SH | portalocker.LOCK_NB,
+            ) as f:
                 cache_data = json.load(f)
             if alias in cache_data:
                 if version:
-                    if version in cache_data[alias]:
-                        return CachedPrompt(**cache_data[alias][version])
-                    else:
-                        raise Exception(
-                            f"Unable to find Prompt version: '{version}' for alias: '{alias}' in cache"
+                    if (
+                        VERSION_CACHE_KEY in cache_data[alias]
+                        and version in cache_data[alias][VERSION_CACHE_KEY]
+                    ):
+                        return CachedPrompt(
+                            **cache_data[alias][VERSION_CACHE_KEY][version]
                         )
-                else:
-                    raise Exception(
-                        f"Unable to load Prompt with alias: '{alias}' from cache when no version is specified "
-                    )
-            else:
-                raise Exception(
-                    f"Unable to find Prompt with alias: '{alias}' in cache"
-                )
-        except Exception as e:
-            raise Exception(f"Error reading Prompt cache from disk: {e}")
+                elif label:
+                    if (
+                        LABEL_CACHE_KEY in cache_data[alias]
+                        and label in cache_data[alias][LABEL_CACHE_KEY]
+                    ):
+                        return CachedPrompt(
+                            **cache_data[alias][LABEL_CACHE_KEY][label]
+                        )
+            return None
+        except (portalocker.exceptions.LockException, Exception):
+            # If cache is locked, corrupted or unreadable, return None and let it fetch from API
+            return None
     def _write_to_cache(
         self,
-        version: Optional[str] = None,
+        cache_key: Literal[VERSION_CACHE_KEY, LABEL_CACHE_KEY],
+        version: str,
+        label: Optional[str] = None,
         text_template: Optional[str] = None,
         messages_template: Optional[List[PromptMessage]] = None,
         prompt_version_id: Optional[str] = None,
         type: Optional[PromptType] = None,
         interpolation_type: Optional[PromptInterpolationType] = None,
     ):
-        if not self.alias or not version:
+        if not self.alias:
             return
-        cache_data = {}
-        if os.path.exists(CACHE_FILE_NAME):
-            try:
-                with open(CACHE_FILE_NAME, "r") as f:
-                    cache_data = json.load(f)
-            except Exception:
-                cache_data = {}
-        # Ensure the cache structure is initialized properly
-        if self.alias not in cache_data:
-            cache_data[self.alias] = {}
-        # Cache the prompt
-        cache_data[self.alias][version] = {
-            "alias": self.alias,
-            "version": version,
-            "template": text_template,
-            "messages_template": messages_template,
-            "prompt_version_id": prompt_version_id,
-            "type": type,
-            "interpolation_type": interpolation_type,
-        }
         # Ensure directory exists
         os.makedirs(HIDDEN_DIR, exist_ok=True)
-        # Write back to cache file
-        with open(CACHE_FILE_NAME, "w") as f:
-            json.dump(cache_data, f, cls=CustomEncoder)
+        try:
+            # Use r+ mode if file exists, w mode if it doesn't
+            mode = "r+" if os.path.exists(CACHE_FILE_NAME) else "w"
+            with portalocker.Lock(
+                CACHE_FILE_NAME,
+                mode=mode,
+                flags=portalocker.LOCK_EX,
+            ) as f:
+                # Read existing cache data if file exists and has content
+                cache_data = {}
+                if mode == "r+":
+                    try:
+                        f.seek(0)
+                        content = f.read()
+                        if content:
+                            cache_data = json.loads(content)
+                    except (json.JSONDecodeError, Exception):
+                        cache_data = {}
+                # Ensure the cache structure is initialized properly
+                if self.alias not in cache_data:
+                    cache_data[self.alias] = {}
+                if cache_key not in cache_data[self.alias]:
+                    cache_data[self.alias][cache_key] = {}
+                # Cache the prompt
+                cached_entry = {
+                    "alias": self.alias,
+                    "version": version,
+                    "label": label,
+                    "template": text_template,
+                    "messages_template": messages_template,
+                    "prompt_version_id": prompt_version_id,
+                    "type": type,
+                    "interpolation_type": interpolation_type,
+                }
+                if cache_key == VERSION_CACHE_KEY:
+                    cache_data[self.alias][cache_key][version] = cached_entry
+                else:
+                    cache_data[self.alias][cache_key][label] = cached_entry
+                # Write back to cache file
+                f.seek(0)
+                f.truncate()
+                json.dump(cache_data, f, cls=CustomEncoder)
+        except portalocker.exceptions.LockException:
+            # If we can't acquire the lock, silently skip caching
+            pass
+        except Exception:
+            # If any other error occurs during caching, silently skip
+            pass
+    def _load_from_cache_with_progress(
+        self,
+        progress: Progress,
+        task_id: int,
+        start_time: float,
+        version: Optional[str] = None,
+        label: Optional[str] = None,
+    ):
+        """
+        Load prompt from cache and update progress bar.
+        Raises if unable to load from cache.
+        """
+        cached_prompt = self._read_from_cache(
+            self.alias, version=version, label=label
+        )
+        if not cached_prompt:
+            raise ValueError("Unable to fetch prompt and load from cache")
+        with self._lock:
+            self.version = cached_prompt.version
+            self.label = cached_prompt.label
+            self._text_template = cached_prompt.template
+            self._messages_template = cached_prompt.messages_template
+            self._prompt_version_id = cached_prompt.prompt_version_id
+            self._type = PromptType(cached_prompt.type)
+            self._interpolation_type = PromptInterpolationType(
+                cached_prompt.interpolation_type
+            )
+        end_time = time.perf_counter()
+        time_taken = format(end_time - start_time, ".2f")
+        progress.update(
+            task_id,
+            description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Loaded from cache! ({time_taken}s)",
+        )
     def pull(
         self,
         version: Optional[str] = None,
+        label: Optional[str] = None,
         fallback_to_cache: bool = True,
         write_to_cache: bool = True,
         default_to_cache: bool = True,
         refresh: Optional[int] = 60,
     ):
+        should_write_on_first_fetch = False
         if refresh:
-            default_to_cache = True
-            write_to_cache = False
+            # Check if we need to bootstrap the cache
+            cached_prompt = self._read_from_cache(
+                self.alias, version=version, label=label
+            )
+            if cached_prompt is None:
+                # No cache exists, so we should write after fetching to bootstrap
+                should_write_on_first_fetch = True
+            write_to_cache = False  # Polling will handle subsequent writes
         if self.alias is None:
             raise TypeError(
                 "Unable to pull prompt from Confident AI when no alias is provided."
             )
         # Manage background prompt polling
-        loop = get_or_create_general_event_loop()
-        if loop.is_running():
-            loop.create_task(self.create_polling_task(version, refresh))
-        else:
-            loop.run_until_complete(self.create_polling_task(version, refresh))
+        if refresh:
+            loop = _get_or_create_polling_loop()
+            asyncio.run_coroutine_threadsafe(
+                self.create_polling_task(version, label, refresh), loop
+            )
         if default_to_cache:
             try:
-                cached_prompt = self._read_from_cache(self.alias, version)
+                cached_prompt = self._read_from_cache(
+                    self.alias, version=version, label=label
+                )
                 if cached_prompt:
-                    self.version = cached_prompt.version
-                    self._text_template = cached_prompt.template
-                    self._messages_template = cached_prompt.messages_template
-                    self._prompt_version_id = cached_prompt.prompt_version_id
-                    self._type = PromptType(cached_prompt.type)
-                    self._interpolation_type = PromptInterpolationType(
-                        cached_prompt.interpolation_type
-                    )
+                    with self._lock:
+                        self.version = cached_prompt.version
+                        self.label = cached_prompt.label
+                        self._text_template = cached_prompt.template
+                        self._messages_template = (
+                            cached_prompt.messages_template
+                        )
+                        self._prompt_version_id = (
+                            cached_prompt.prompt_version_id
+                        )
+                        self._type = PromptType(cached_prompt.type)
+                        self._interpolation_type = PromptInterpolationType(
+                            cached_prompt.interpolation_type
+                        )
                     return
             except:
                 pass
@@ -254,63 +393,66 @@ class Prompt:
             TextColumn("[progress.description]{task.description}"),
             transient=False,
         ) as progress:
+            HINT_TEXT = (
+                f"version='{version or 'latest'}'"
+                if not label
+                else f"label='{label}'"
+            )
             task_id = progress.add_task(
-                f"Pulling [rgb(106,0,255)]'{self.alias}' (version='{version or 'latest'}')[/rgb(106,0,255)] from Confident AI...",
+                f"Pulling [rgb(106,0,255)]'{self.alias}' ({HINT_TEXT})[/rgb(106,0,255)] from Confident AI...",
                 total=100,
             )
             start_time = time.perf_counter()
             try:
-                data, _ = api.send_request(
-                    method=HttpMethods.GET,
-                    endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
-                    url_params={
-                        "alias": self.alias,
-                        "versionId": version or "latest",
-                    },
-                )
+                if label:
+                    data, _ = api.send_request(
+                        method=HttpMethods.GET,
+                        endpoint=Endpoints.PROMPTS_LABEL_ENDPOINT,
+                        url_params={
+                            "alias": self.alias,
+                            "label": label,
+                        },
+                    )
+                else:
+                    data, _ = api.send_request(
+                        method=HttpMethods.GET,
+                        endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
+                        url_params={
+                            "alias": self.alias,
+                            "versionId": version or "latest",
+                        },
+                    )
                 response = PromptHttpResponse(
                     id=data["id"],
+                    version=data.get("version", None),
+                    label=data.get("label", None),
                     text=data.get("text", None),
                     messages=data.get("messages", None),
                     type=data["type"],
                     interpolation_type=data["interpolationType"],
                 )
-            except:
-                try:
-                    if fallback_to_cache:
-                        cached_prompt = self._read_from_cache(
-                            self.alias, version
-                        )
-                        if cached_prompt:
-                            self.version = cached_prompt.version
-                            self._text_template = cached_prompt.template
-                            self._messages_template = (
-                                cached_prompt.messages_template
-                            )
-                            self._prompt_version_id = (
-                                cached_prompt.prompt_version_id
-                            )
-                            self._type = PromptType(cached_prompt.type)
-                            self._interpolation_type = PromptInterpolationType(
-                                cached_prompt.interpolation_type
-                            )
-                            end_time = time.perf_counter()
-                            time_taken = format(end_time - start_time, ".2f")
-                            progress.update(
-                                task_id,
-                                description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Loaded from cache! ({time_taken}s)",
-                            )
-                            return
-                except:
-                    raise
-            self.version = version or "latest"
-            self._text_template = response.text
-            self._messages_template = response.messages
-            self._prompt_version_id = response.id
-            self._type = response.type
-            self._interpolation_type = response.interpolation_type
+            except Exception:
+                if fallback_to_cache:
+                    self._load_from_cache_with_progress(
+                        progress,
+                        task_id,
+                        start_time,
+                        version=version,
+                        label=label,
+                    )
+                    return
+                raise
+            with self._lock:
+                self.version = response.version
+                self.label = response.label
+                self._text_template = response.text
+                self._messages_template = response.messages
+                self._prompt_version_id = response.id
+                self._type = response.type
+                self._interpolation_type = response.interpolation_type
             end_time = time.perf_counter()
             time_taken = format(end_time - start_time, ".2f")
@@ -318,9 +460,12 @@ class Prompt:
                 task_id,
                 description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Done! ({time_taken}s)",
             )
-            if write_to_cache:
+            # Write to cache if explicitly requested OR if we need to bootstrap cache for refresh mode
+            if write_to_cache or should_write_on_first_fetch:
                 self._write_to_cache(
-                    version=version or "latest",
+                    cache_key=LABEL_CACHE_KEY if label else VERSION_CACHE_KEY,
+                    version=response.version,
+                    label=response.label,
                     text_template=response.text,
                     messages_template=response.messages,
                     prompt_version_id=response.id,
@@ -380,55 +525,117 @@ class Prompt:
     async def create_polling_task(
         self,
         version: Optional[str],
+        label: Optional[str],
         refresh: Optional[int] = 60,
     ):
-        if version is None:
-            return
         # If polling task doesn't exist, start it
-        polling_task: Optional[asyncio.Task] = self._polling_tasks.get(version)
+        CACHE_KEY = LABEL_CACHE_KEY if label else VERSION_CACHE_KEY
+        cache_value = label if label else version
+        # Initialize nested dicts if they don't exist
+        if CACHE_KEY not in self._polling_tasks:
+            self._polling_tasks[CACHE_KEY] = {}
+        if CACHE_KEY not in self._refresh_map:
+            self._refresh_map[CACHE_KEY] = {}
+        polling_task: Optional[asyncio.Task] = self._polling_tasks[
+            CACHE_KEY
+        ].get(cache_value)
         if refresh:
-            self._refresh_map[version] = refresh
+            self._refresh_map[CACHE_KEY][cache_value] = refresh
             if not polling_task:
-                self._polling_tasks[version] = asyncio.create_task(
-                    self.poll(version)
+                self._polling_tasks[CACHE_KEY][cache_value] = (
+                    asyncio.create_task(self.poll(version, label))
                 )
         # If invalid `refresh`, stop the task
         else:
             if polling_task:
                 polling_task.cancel()
-            self._polling_tasks.pop(version)
-            self._refresh_map.pop(version)
+            if cache_value in self._polling_tasks[CACHE_KEY]:
+                self._polling_tasks[CACHE_KEY].pop(cache_value)
+            if cache_value in self._refresh_map[CACHE_KEY]:
+                self._refresh_map[CACHE_KEY].pop(cache_value)
+    async def poll(
+        self,
+        version: Optional[str] = None,
+        label: Optional[str] = None,
+    ):
+        CACHE_KEY = LABEL_CACHE_KEY if label else VERSION_CACHE_KEY
+        cache_value = label if label else version
-    async def poll(self, version: Optional[str] = None):
-        api = Api()
         while True:
+            await asyncio.sleep(self._refresh_map[CACHE_KEY][cache_value])
+            api = Api()
             try:
-                data, _ = api.send_request(
-                    method=HttpMethods.GET,
-                    endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
-                    url_params={
-                        "alias": self.alias,
-                        "versionId": version or "latest",
-                    },
-                )
+                if label:
+                    data, _ = api.send_request(
+                        method=HttpMethods.GET,
+                        endpoint=Endpoints.PROMPTS_LABEL_ENDPOINT,
+                        url_params={
+                            "alias": self.alias,
+                            "label": label,
+                        },
+                    )
+                else:
+                    data, _ = api.send_request(
+                        method=HttpMethods.GET,
+                        endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
+                        url_params={
+                            "alias": self.alias,
+                            "versionId": version or "latest",
+                        },
+                    )
                 response = PromptHttpResponse(
                     id=data["id"],
+                    version=data.get("version", None),
+                    label=data.get("label", None),
                     text=data.get("text", None),
                     messages=data.get("messages", None),
                     type=data["type"],
                     interpolation_type=data["interpolationType"],
                 )
+                # Update the cache with fresh data from server
                 self._write_to_cache(
-                    version=version or "latest",
+                    cache_key=CACHE_KEY,
+                    version=response.version,
+                    label=response.label,
                     text_template=response.text,
                     messages_template=response.messages,
                     prompt_version_id=response.id,
                     type=response.type,
                     interpolation_type=response.interpolation_type,
                 )
-            except Exception as e:
+                # Update in-memory properties with fresh data (thread-safe)
+                with self._lock:
+                    self.version = response.version
+                    self.label = response.label
+                    self._text_template = response.text
+                    self._messages_template = response.messages
+                    self._prompt_version_id = response.id
+                    self._type = response.type
+                    self._interpolation_type = response.interpolation_type
+            except Exception:
                 pass
-            await asyncio.sleep(self._refresh_map[version])
+    def _stop_polling(self):
+        loop = _polling_loop
+        if not loop or not loop.is_running():
+            return
+        # Stop all polling tasks
+        for ck in list(self._polling_tasks.keys()):
+            for cv in list(self._polling_tasks[ck].keys()):
+                task = self._polling_tasks[ck][cv]
+                if task and not task.done():
+                    loop.call_soon_threadsafe(task.cancel)
+            self._polling_tasks[ck].clear()
+            self._refresh_map[ck].clear()
+        return

deepeval/tracing/otel/utils.py CHANGED Viewed

@@ -383,53 +383,70 @@ def post_test_run(traces: List[Trace], test_run_id: Optional[str]):
     # return test_run_manager.post_test_run(test_run) TODO: add after test run with metric collection is implemented
+def _normalize_pydantic_ai_messages(span: ReadableSpan) -> Optional[list]:
+    try:
+        raw = span.attributes.get("pydantic_ai.all_messages")
+        if not raw:
+            return None
+        messages = raw
+        if isinstance(messages, str):
+            messages = json.loads(messages)
+        elif isinstance(messages, tuple):
+            messages = list(messages)
+        if isinstance(messages, list):
+            normalized = []
+            for m in messages:
+                if isinstance(m, str):
+                    try:
+                        m = json.loads(m)
+                    except Exception:
+                        pass
+                normalized.append(m)
+            return normalized
+    except Exception:
+        pass
+    return None
 def check_pydantic_ai_agent_input_output(
     span: ReadableSpan,
 ) -> Tuple[Optional[Any], Optional[Any]]:
     input_val: Optional[Any] = None
     output_val: Optional[Any] = None
+    # Get normalized messages once
+    normalized = _normalize_pydantic_ai_messages(span)
     # Input (pydantic_ai.all_messages) - slice up to and including the first 'user' message
-    try:
-        raw = span.attributes.get("pydantic_ai.all_messages")
-        if raw:
-            messages = raw
-            if isinstance(messages, str):
-                messages = json.loads(messages)
-            elif isinstance(messages, tuple):
-                messages = list(messages)
-            if isinstance(messages, list):
-                normalized = []
-                for m in messages:
-                    if isinstance(m, str):
-                        try:
-                            m = json.loads(m)
-                        except Exception:
-                            pass
-                    normalized.append(m)
-                first_user_idx = None
-                for i, m in enumerate(normalized):
-                    role = None
-                    if isinstance(m, dict):
-                        role = m.get("role") or m.get("author")
-                    if role == "user":
-                        first_user_idx = i
-                        break
-                input_val = (
-                    normalized
-                    if first_user_idx is None
-                    else normalized[: first_user_idx + 1]
-                )
-    except Exception:
-        pass
+    if normalized:
+        try:
+            first_user_idx = None
+            for i, m in enumerate(normalized):
+                role = None
+                if isinstance(m, dict):
+                    role = m.get("role") or m.get("author")
+                if role == "user":
+                    first_user_idx = i
+                    break
+            input_val = (
+                normalized
+                if first_user_idx is None
+                else normalized[: first_user_idx + 1]
+            )
+        except Exception:
+            pass
     # Output (agent final_result)
     try:
         if span.attributes.get("confident.span.type") == "agent":
             output_val = span.attributes.get("final_result")
+            if not output_val and normalized:
+                # Extract the last message if no final_result is available
+                output_val = normalized[-1]
     except Exception:
         pass

{deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: deepeval
-Version: 3.6.2
+Version: 3.6.4
 Summary: The LLM Evaluation Framework
 Home-page: https://github.com/confident-ai/deepeval
 License: Apache-2.0

{deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
-deepeval/_version.py,sha256=3BMVt8jAt3lUkzkZWaFVDhhP9a-3lhvDGzjhGKNfjCo,27
+deepeval/_version.py,sha256=7aJWTxY4XnqpfnHnpzOHDXIjM0FFMGZTYkyt2xqUalQ,27
 deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
 deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
 deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
@@ -138,7 +138,7 @@ deepeval/cli/test.py,sha256=kSIFMRTAfVzBJ4OitwvT829-ylV7UzPMP57P2DePS-Q,5482
 deepeval/cli/types.py,sha256=_7KdthstHNc-JKCWrfpDQCf_j8h9PMxh0qJCHmVXJr0,310
 deepeval/cli/utils.py,sha256=F4-yuONzk4ojDoSLjI9RYERB7HOD412iZ2lNlSCq4wk,5601
 deepeval/confident/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-deepeval/confident/api.py,sha256=bOC71TaVAEgoXFtJ9yMo0-atmUUdBuvaclMGczMcR6o,8455
+deepeval/confident/api.py,sha256=2ZhrQOtfxcnQSyY6OxrjY17y1yn-NB7pfIiJa20B1Pk,8519
 deepeval/confident/types.py,sha256=-slFhDof_1maMgpLxqDRZv6kz6ZVY2hP_0uj_aveJKU,533
 deepeval/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deepeval/config/settings.py,sha256=gRRi6nXEUKse13xAShU9MA18zo14vpIgl_R0xJ_0vnM,21314
@@ -240,7 +240,7 @@ deepeval/metrics/faithfulness/faithfulness.py,sha256=bYVhHI7Tr7xH0x-7F2LijxRuCCE
 deepeval/metrics/faithfulness/schema.py,sha256=2dU9dwwmqpGJcWvY2webERWIfH_tn02xgLghHkAY_eM,437
 deepeval/metrics/faithfulness/template.py,sha256=RuZ0LFm4BjZ8lhVrKPgU3ecHszwkF0fe5-BxAkaP5AA,5839
 deepeval/metrics/g_eval/__init__.py,sha256=HAhsQFVq9LIpZXPN00Jc_WrMXrh47NIT86VnUpWM4_4,102
-deepeval/metrics/g_eval/g_eval.py,sha256=JI3rTaEClYgiL9oLaVFh7sunqGoXI7qBeBgi9RkSwDs,14327
+deepeval/metrics/g_eval/g_eval.py,sha256=CaW7VHPW-SyXt18IE1rSatgagY238s3It-j6SLRI4H4,14395
 deepeval/metrics/g_eval/schema.py,sha256=V629txuDrr_2IEKEsgJVYYZb_pkdfcltQV9ZjvxK5co,287
 deepeval/metrics/g_eval/template.py,sha256=mHj4-mr_HQwbCjpHg7lM_6UesoSatL3g8UGGQAOdT0U,4509
 deepeval/metrics/g_eval/utils.py,sha256=uUT86jRXVYvLDzcnZvvfWssDyGoBHb66nWcJSg4i1u4,8784
@@ -348,7 +348,7 @@ deepeval/metrics/task_completion/schema.py,sha256=JfnZkbCh7skWvrESy65GEo6Rvo0FDJ
 deepeval/metrics/task_completion/task_completion.py,sha256=RKFkXCVOhO70I8A16zv5BCaV3QVKldNxawJ0T93U_Zc,8978
 deepeval/metrics/task_completion/template.py,sha256=4xjTBcGrPQxInbf8iwJOZyok9SQex1aCkbxKmfkXoA4,10437
 deepeval/metrics/tool_correctness/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-deepeval/metrics/tool_correctness/tool_correctness.py,sha256=4dS8o5pD2o9W2uDb-lFgulHpLI5kFhAlguWlffIreUU,11993
+deepeval/metrics/tool_correctness/tool_correctness.py,sha256=j5wB9mJp7BLbn3bTZd6LlIeub1kXxXGaDVWrzyvBFo4,12111
 deepeval/metrics/toxicity/__init__.py,sha256=1lgt8BKxfBDd7bfSLu_5kMzmsr9b2_ahPK9oq5zLkMk,39
 deepeval/metrics/toxicity/schema.py,sha256=7uUdzXqTvIIz5nfahlllo_fzVRXg7UeMeXn7Hl32pKY,459
 deepeval/metrics/toxicity/template.py,sha256=zl4y4Tg9gXkxKJ8aXVwj0cJ94pvfYuP7MTeV3dvB5yQ,5045
@@ -370,7 +370,7 @@ deepeval/models/embedding_models/ollama_embedding_model.py,sha256=w3etdIdWvYfVIE
 deepeval/models/embedding_models/openai_embedding_model.py,sha256=Z1--e3CnNNmwryqmUMxBCaTURjtgKWHqADuUeCqFlSc,3545
 deepeval/models/hallucination_model.py,sha256=ABi978VKLE_jNHbDzM96kJ08EsZ5ZlvOlJHA_ptSkfQ,1003
 deepeval/models/llms/__init__.py,sha256=qmvv7wnmTDvys2uUTwQRo-_3DlFV3fGLiewPeQYRsAI,670
-deepeval/models/llms/amazon_bedrock_model.py,sha256=xaNV7BnqcsH31ghIKBcacKzetORlFRGHtuBlfr8vbnQ,6183
+deepeval/models/llms/amazon_bedrock_model.py,sha256=3yiUUGU_d_YK7Usq8v5iqG3yHa5VnqeDOoCLG_p8rtc,5185
 deepeval/models/llms/anthropic_model.py,sha256=5gYRNkYUD7Zl3U0SibBG2YGCQsD6DdTsaBhqdaJlKIw,6072
 deepeval/models/llms/azure_model.py,sha256=dqINcfoJNqdd9zh5iTPwQ_ToGMOF7iH6YUB-UWRSOlc,10730
 deepeval/models/llms/deepseek_model.py,sha256=EqBJkKa7rXppCmlnIt_D-Z_r9fbsOUsOAVvN2jWA-Hk,6404
@@ -380,8 +380,8 @@ deepeval/models/llms/kimi_model.py,sha256=ldTefdSVitZYJJQ-_ZsP87iiT5iZ4QCVdfi-Yz
 deepeval/models/llms/litellm_model.py,sha256=iu4-_JCpd9LdEa-eCWseD2iLTA-r7OSgYGWQ0IxB4eA,11527
 deepeval/models/llms/local_model.py,sha256=hEyKVA6pkQm9dICUKsMNgjVI3w6gnyMdmBt_EylkWDk,4473
 deepeval/models/llms/ollama_model.py,sha256=xPO4d4jMY-cQAyHAcMuFvWS8JMWwCUbKP9CMi838Nuc,3307
-deepeval/models/llms/openai_model.py,sha256=F02N8BgbiEXH7F6y-a6DkjVcBXFEzr87SEB2gVn4xlU,17192
-deepeval/models/llms/utils.py,sha256=ZMZ02kjXAAleq0bIEyjj-gZwe6Gp0b0mK8YMuid2-20,722
+deepeval/models/llms/openai_model.py,sha256=mUvQ8a9FVk4lrdZyS_QRZTK4imufyaCNjZFPeqbc0AM,17167
+deepeval/models/llms/utils.py,sha256=gFM_8eIvdSwN_D4Yqp-j7PkfoiRn_bgu7tlCHol3A6c,1324
 deepeval/models/mlllms/__init__.py,sha256=19nN6kUB5XI0nUWUQX0aD9GBUMM8WWGvsDgKjuT4EF4,144
 deepeval/models/mlllms/gemini_model.py,sha256=7tHIWD4w_fBz3L7jkKWygn1QpBPk9nl2Kw-yb0Jc3PI,10167
 deepeval/models/mlllms/ollama_model.py,sha256=_YtYtw8oIMVVI-CFsDicsdeEJUPhw_9ArPxB_1olsJA,4798
@@ -404,8 +404,8 @@ deepeval/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
 deepeval/plugins/plugin.py,sha256=_dwsdx4Dg9DbXxK3f7zJY4QWTJQWc7QE1HmIg2Zjjag,1515
 deepeval/progress_context.py,sha256=ZSKpxrE9sdgt9G3REKnVeXAv7GJXHHVGgLynpG1Pudw,3557
 deepeval/prompt/__init__.py,sha256=M99QTWdxOfiNeySGCSqN873Q80PPxqRvjLq4_Mw-X1w,49
-deepeval/prompt/api.py,sha256=kR3MkaHuU2wYILKVnvnXhQWxWp0XgtcWX-kIjpMJRl8,1728
-deepeval/prompt/prompt.py,sha256=192W5zFBx08nELxRHHDQscMM3psj8OUFV_JR85BZv8Q,15823
+deepeval/prompt/api.py,sha256=665mLKiq8irXWV8kM9P_qFJipdCYZUNQFwW8AkA3itM,1777
+deepeval/prompt/prompt.py,sha256=JjPm7rB-3rnTs8oEQT4EEwnqQqE8ZFNXebVngEOWsI4,23537
 deepeval/prompt/utils.py,sha256=Ermw9P-1-T5wQ5uYuj5yWgdj7pVB_JLw8D37Qvmh9ok,1938
 deepeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deepeval/red_teaming/README.md,sha256=BY5rAdpp3-sMMToEKwq0Nsd9ivkGDzPE16DeDb8GY7U,154
@@ -454,15 +454,15 @@ deepeval/tracing/offline_evals/thread.py,sha256=bcSGFcZJKnszArOLIlWvnCyt0zSmsd7X
 deepeval/tracing/offline_evals/trace.py,sha256=vTflaTKysKRiYvKA-Nx6PUJ3J6NrRLXiIdWieVcm90E,1868
 deepeval/tracing/otel/__init__.py,sha256=HQsaF5yLPwyW5qg8AOV81_nG_7pFHnatOTHi9Wx3HEk,88
 deepeval/tracing/otel/exporter.py,sha256=wPO1ITKpjueLOSNLO6nD2QL9LAd8Xcu6en8hRkB61Wo,28891
-deepeval/tracing/otel/utils.py,sha256=4FqCwOi-iYhuQ3GhAkbbmXbfhvSLGj9DAdfPCrUIccs,14738
+deepeval/tracing/otel/utils.py,sha256=yAXyPvTjax2HdLcvbVv9pyOVW4S7elIp3RLGuBTr_8o,15113
 deepeval/tracing/patchers.py,sha256=DAPNkhrDtoeyJIVeQDUMhTz-xGcXu00eqjQZmov8FiU,3096
 deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
 deepeval/tracing/tracing.py,sha256=xZEyuxdGY259nQaDkGp_qO7Avriv8hrf4L15ZfeMNV8,42728
 deepeval/tracing/types.py,sha256=l_utWKerNlE5H3mOKpeUJLsvpP3cMyjH7HRANNgTmSQ,5306
 deepeval/tracing/utils.py,sha256=SLnks8apGlrV6uVnvFVl2mWYABEkvXbPXnQvq3KaU_o,7943
 deepeval/utils.py,sha256=-_o3W892u7naX4Y7a8if4mP0Rtkgtapg6Krr1ZBpj0o,17197
-deepeval-3.6.2.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
-deepeval-3.6.2.dist-info/METADATA,sha256=TZ7FbJUYYZ1w2P-qmLZdIHB0zv4TnZ4VeLBgN9Bq6Yo,18754
-deepeval-3.6.2.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
-deepeval-3.6.2.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
-deepeval-3.6.2.dist-info/RECORD,,
+deepeval-3.6.4.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
+deepeval-3.6.4.dist-info/METADATA,sha256=oZQnVgn7bI4TUmgA7W_fsoflHL4RuT23O7oBkoo5XcM,18754
+deepeval-3.6.4.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
+deepeval-3.6.4.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
+deepeval-3.6.4.dist-info/RECORD,,

{deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

deepeval 3.6.2__py3-none-any.whl → 3.6.4__py3-none-any.whl

deepeval 3.6.2py3-none-any.whl → 3.6.4py3-none-any.whl