PyPI - deepeval - Versions diffs - 3.5.0__py3-none-any.whl → 3.5.1__py3-none-any.whl - Mend

deepeval 3.5.0py3-none-any.whl → 3.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

deepeval/_version.py +1 -1
deepeval/confident/api.py +2 -0
deepeval/integrations/langchain/callback.py +21 -0
deepeval/metrics/__init__.py +1 -1
deepeval/metrics/answer_relevancy/template.py +13 -38
deepeval/metrics/faithfulness/template.py +17 -27
deepeval/prompt/api.py +22 -4
deepeval/prompt/prompt.py +131 -17
deepeval/synthesizer/synthesizer.py +17 -9
deepeval/tracing/api.py +3 -0
deepeval/tracing/context.py +3 -1
deepeval/tracing/tracing.py +12 -2
deepeval/tracing/types.py +3 -0
deepeval/tracing/utils.py +6 -2
{deepeval-3.5.0.dist-info → deepeval-3.5.1.dist-info}/METADATA +1 -1
{deepeval-3.5.0.dist-info → deepeval-3.5.1.dist-info}/RECORD +19 -19
{deepeval-3.5.0.dist-info → deepeval-3.5.1.dist-info}/LICENSE.md +0 -0
{deepeval-3.5.0.dist-info → deepeval-3.5.1.dist-info}/WHEEL +0 -0
{deepeval-3.5.0.dist-info → deepeval-3.5.1.dist-info}/entry_points.txt +0 -0

deepeval/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__: str = "3.5.0"
1	+ __version__: str = "3.5.1"

deepeval/confident/api.py CHANGED Viewed

@@ -89,7 +89,9 @@ class Endpoints(Enum):
     TEST_RUN_ENDPOINT = "/v1/test-run"
     TRACES_ENDPOINT = "/v1/traces"
     ANNOTATIONS_ENDPOINT = "/v1/annotations"
+    PROMPTS_VERSION_ID_ENDPOINT = "/v1/prompts/:alias/versions/:versionId"
     PROMPTS_ENDPOINT = "/v1/prompts"
+    PROMPTS_VERSIONS_ENDPOINT = "/v1/prompts/:alias/versions"
     SIMULATE_ENDPOINT = "/v1/simulate"
     EVALUATE_ENDPOINT = "/v1/evaluate"

deepeval/integrations/langchain/callback.py CHANGED Viewed

@@ -9,6 +9,7 @@ from deepeval.tracing.types import (
 from deepeval.metrics import BaseMetric, TaskCompletionMetric
 from deepeval.test_case import LLMTestCase
 from deepeval.test_run import global_test_run_manager
+import uuid
 try:
     from langchain_core.callbacks.base import BaseCallbackHandler
@@ -81,6 +82,26 @@ class CallbackHandler(BaseCallbackHandler):
             )
             super().__init__()
+    def on_llm_new_token(
+        self,
+        token: str,
+        *,
+        chunk,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[list[str]] = None,
+        **kwargs: Any,
+    ):
+        llm_span: Optional[LlmSpan] = trace_manager.get_span_by_uuid(
+            str(run_id)
+        )
+        if llm_span is None:
+            return
+        if llm_span.token_intervals is None:
+            llm_span.token_intervals = {perf_counter(): token}
+        else:
+            llm_span.token_intervals[perf_counter()] = token
     def check_active_trace_id(self):
         if self.active_trace_id is None:
             self.active_trace_id = trace_manager.start_new_trace().uuid

deepeval/metrics/__init__.py CHANGED Viewed

@@ -69,7 +69,7 @@ __all__ = [
     "ConversationalGEval",
     "DAGMetric",
     "DeepAcyclicGraph",
-    "ConversationalDAGMetric"
+    "ConversationalDAGMetric",
     # RAG metrics
     "AnswerRelevancyMetric",
     "FaithfulnessMetric",

deepeval/metrics/answer_relevancy/template.py CHANGED Viewed

@@ -34,62 +34,37 @@ JSON:
     @staticmethod
     def generate_verdicts(input: str, statements: str):
         return f"""For the provided list of statements, determine whether each statement is relevant to address the input.
-Please generate a list of JSON with two keys: `verdict` and `reason`.
-The 'verdict' key should STRICTLY be either a 'yes', 'idk' or 'no'. Answer 'yes' if the statement is relevant to addressing the original input, 'no' if the statement is irrelevant, and 'idk' if it is ambiguous (eg., not directly relevant but could be used as a supporting point to address the input).
-The 'reason' is the reason for the verdict.
-Provide a 'reason' ONLY if the answer is 'no' or 'idk'.
-The provided statements are statements made in the actual output.
+Generate JSON objects with 'verdict' and 'reason' fields.
+The 'verdict' should be 'yes' (relevant), 'no' (irrelevant), or 'idk' (ambiguous/supporting information).
+Provide 'reason' ONLY for 'no' or 'idk' verdicts.
+The statements are from an AI's actual output.
 **
 IMPORTANT: Please make sure to only return in valid and parseable JSON format, with the 'verdicts' key mapping to a list of JSON objects. Ensure all strings are closed appropriately. Repair any invalid JSON before you output it.
-Example input:
-What features does the new laptop have?
-Example:
-Example statements:
-[
-    "The new laptop model has a high-resolution Retina display.",
-    "It includes a fast-charging battery with up to 12 hours of usage.",
-    "Security features include fingerprint authentication and an encrypted SSD.",
-    "Every purchase comes with a one-year warranty.",
-    "24/7 customer support is included.",
-    "Pineapples taste great on pizza.",
-    "The laptop is a Dell XPS 13."
-]
-Example JSON:
+Expected JSON format:
 {{
     "verdicts": [
         {{
             "verdict": "yes"
         }},
-        {{
-            "verdict": "yes"
-        }},
-        {{
-            "verdict": "yes"
-        }},
-        {{
-            "verdict": "no",
-            "reason": "A one-year warranty is a purchase benefit, not a feature of the laptop itself."
-        }},
         {{
             "verdict": "no",
-            "reason": "Customer support is a service, not a feature of the laptop."
-        }},
-        {{
-            "verdict": "no",
-            "reason": "The statement about pineapples on pizza is completely irrelevant to the input, which asks about laptop features."
+            "reason": <explanation_for_irrelevance>
         }},
         {{
             "verdict": "idk",
-            "reason": "The statement about the laptop being a Dell XPS 13 is not directly relevant to the input, but could be used as a supporting point to address the input."
+            "reason": <explanation_for_ambiguity>
         }}
     ]
 }}
-===== END OF EXAMPLE ======
-Since you are going to generate a verdict for each statement, the number of 'verdicts' SHOULD BE STRICTLY EQUAL to the number of `statements`.
+Generate ONE verdict per statement - number of 'verdicts' MUST equal number of statements.
+'verdict' must be STRICTLY 'yes', 'no', or 'idk':
+- 'yes': statement is relevant to addressing the input
+- 'no': statement is irrelevant to the input
+- 'idk': statement is ambiguous (not directly relevant but could be supporting information)
+Provide 'reason' ONLY for 'no' or 'idk' verdicts.
 **
 Input:

deepeval/metrics/faithfulness/template.py CHANGED Viewed

@@ -76,42 +76,31 @@ The 'verdict' key should STRICTLY be either 'yes', 'no', or 'idk', which states
 Provide a 'reason' ONLY if the answer is 'no' or 'idk'.
 The provided claim is drawn from the actual output. Try to provide a correction in the reason using the facts in the retrieval context.
-**
-IMPORTANT: Please make sure to only return in JSON format, with the 'verdicts' key as a list of JSON objects.
-Example retrieval contexts: "Einstein won the Nobel Prize for his discovery of the photoelectric effect. Einstein won the Nobel Prize in 1968. Einstein is a German Scientist."
-Example claims: ["Barack Obama is a caucasian male.", "Zurich is a city in London", "Einstein won the Nobel Prize for the discovery of the photoelectric effect which may have contributed to his fame.", "Einstein won the Nobel Prize in 1969 for his discovery of the photoelectric effect.", "Einstein was a German chef."]
-Example:
+Expected JSON format:
 {{
     "verdicts": [
-        {{
-            "verdict": "idk",
-            "reason": "The claim about Barack Obama is although incorrect, it is not directly addressed in the retrieval context, and so poses no contradiction."
-        }},
-        {{
-            "verdict": "idk",
-            "reason": "The claim about Zurich being a city in London is incorrect but does not pose a contradiction to the retrieval context."
-        }},
         {{
             "verdict": "yes"
         }},
         {{
             "verdict": "no",
-            "reason": "The actual output claims Einstein won the Nobel Prize in 1969, which is untrue as the retrieval context states it is 1968 instead. This contradicts the retrieval context."
+            "reason": <explanation_for_contradiction>
         }},
         {{
-            "verdict": "no",
-            "reason": "The actual output claims Einstein is a German chef, which is not correct as the retrieval context states he was a German scientist instead. This contradicts the retrieval context."
-        }},
+            "verdict": "idk",
+            "reason": <explanation_for_uncertainty>
+        }}
     ]
 }}
-===== END OF EXAMPLE ======
-The length of 'verdicts' SHOULD BE STRICTLY EQUAL to that of claims.
-You DON'T have to provide a reason if the answer is 'yes'.
-ONLY provide a 'no' answer if the retrieval context DIRECTLY CONTRADICTS the claims. YOU SHOULD NEVER USE YOUR PRIOR KNOWLEDGE IN YOUR JUDGEMENT.
-Claims made using vague, suggestive, speculative language such as 'may have', 'possibility due to', does NOT count as a contradiction.
-Claims that are not backed up by the retrieval context or are not mentioned in it MUST be answered 'idk'.
+Generate ONE verdict per claim - length of 'verdicts' MUST equal number of claims.
+No 'reason' needed for 'yes' verdicts.
+Only use 'no' if retrieval context DIRECTLY CONTRADICTS the claim - never use prior knowledge.
+Use 'idk' for claims not backed up by context OR factually incorrect but non-contradictory - do not assume your knowledge.
+Vague/speculative language in claims (e.g. 'may have', 'possibility') does NOT count as contradiction.
+**
+IMPORTANT: Please make sure to only return in JSON format, with the 'verdicts' key as a list of JSON objects.
 **
 Retrieval Contexts:
@@ -128,13 +117,14 @@ JSON:
         return f"""Below is a list of Contradictions. It is a list of strings explaining why the 'actual output' does not align with the information presented in the 'retrieval context'. Contradictions happen in the 'actual output', NOT the 'retrieval context'.
 Given the faithfulness score, which is a 0-1 score indicating how faithful the `actual output` is to the retrieval context (higher the better), CONCISELY summarize the contradictions to justify the score.
-**
-IMPORTANT: Please make sure to only return in JSON format, with the 'reason' key providing the reason.
-Example JSON:
+Expected JSON format:
 {{
     "reason": "The score is <faithfulness_score> because <your_reason>."
 }}
+**
+IMPORTANT: Please make sure to only return in JSON format, with the 'reason' key providing the reason.
 If there are no contradictions, just say something positive with an upbeat encouraging tone (but don't overdo it otherwise it gets annoying).
 Your reason MUST use information in `contradiction` in your reason.
 Be sure in your reason, as if you know what the actual output is from the contradictions.

deepeval/prompt/api.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, AliasChoices
 from enum import Enum
 from typing import List, Optional
@@ -19,9 +19,28 @@ class PromptType(Enum):
     TEXT = "TEXT"
     LIST = "LIST"
+class PromptVersion(BaseModel):
+    id: str
+    version: str
+    commit_message: str = Field(
+        serialization_alias="commitMessage",
+        validation_alias=AliasChoices("commit_message", "commitMessage")
+    )
+class PromptVersionsHttpResponse(BaseModel):
+    text_versions: Optional[List[PromptVersion]] = Field(
+        None,
+        serialization_alias="textVersions",
+        validation_alias=AliasChoices("text_versions", "textVersions")
+    )
+    messages_versions: Optional[List[PromptVersion]] = Field(
+        None,
+        serialization_alias="messagesVersions",
+        validation_alias=AliasChoices("messages_versions", "messagesVersions")
+    )
 class PromptHttpResponse(BaseModel):
-    promptVersionId: str
+    id: str
     text: Optional[str] = None
     messages: Optional[List[PromptMessage]] = None
     interpolation_type: PromptInterpolationType = Field(
@@ -29,7 +48,6 @@ class PromptHttpResponse(BaseModel):
     )
     type: PromptType
 class PromptPushRequest(BaseModel):
     alias: str
     text: Optional[str] = None
@@ -44,4 +62,4 @@ class PromptPushRequest(BaseModel):
 class PromptApi(BaseModel):
     id: str
-    type: PromptType
+    type: PromptType

deepeval/prompt/prompt.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from enum import Enum
-from typing import Optional, List
+from typing import Optional, List, Dict
 from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
 from rich.console import Console
 import time
 import json
 import os
 from pydantic import BaseModel
+import asyncio
 from deepeval.prompt.api import (
     PromptHttpResponse,
@@ -13,11 +14,12 @@ from deepeval.prompt.api import (
     PromptType,
     PromptInterpolationType,
     PromptPushRequest,
+    PromptVersionsHttpResponse,
 )
 from deepeval.prompt.utils import interpolate_text
 from deepeval.confident.api import Api, Endpoints, HttpMethods
 from deepeval.constants import HIDDEN_DIR
+from deepeval.utils import get_or_create_event_loop
 CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-prompt-cache.json"
@@ -63,7 +65,23 @@ class Prompt:
         self.alias = alias
         self._text_template = template
         self._messages_template = messages_template
-        self.version = None
+        self._version = None
+        self._polling_tasks: Dict[str, asyncio.Task] = {}
+        self._refresh_map: Dict[str, int] = {}
+    @property
+    def version(self):
+        if self._version is not None and self._version != "latest":
+            return self._version
+        versions = self._get_versions()
+        if len(versions) == 0:
+            return "latest"
+        else:
+            return versions[-1].version
+    @version.setter
+    def version(self, value):
+        self._version = value
     def interpolate(self, **kwargs):
         if self._type == PromptType.TEXT:
@@ -93,6 +111,20 @@ class Prompt:
             return interpolated_messages
         else:
             raise ValueError(f"Unsupported prompt type: {self._type}")
+    def _get_versions(self) -> List:
+        if self.alias is None:
+            raise ValueError(
+                "Prompt alias is not set. Please set an alias to continue."
+            )
+        api = Api()
+        data, _ = api.send_request(
+            method=HttpMethods.GET,
+            endpoint=Endpoints.PROMPTS_VERSIONS_ENDPOINT,
+            url_params={"alias": self.alias},
+        )
+        versions = PromptVersionsHttpResponse(**data)
+        return versions.text_versions or versions.messages_versions or []
     def _read_from_cache(
         self, alias: str, version: Optional[str] = None
@@ -123,8 +155,16 @@ class Prompt:
         except Exception as e:
             raise Exception(f"Error reading Prompt cache from disk: {e}")
-    def _write_to_cache(self):
-        if not self.alias or not self.version:
+    def _write_to_cache(
+        self,
+        version: Optional[str] = None,
+        text_template: Optional[str] = None,
+        messages_template: Optional[List[PromptMessage]] = None,
+        prompt_version_id: Optional[str] = None,
+        type: Optional[PromptType] = None,
+        interpolation_type: Optional[PromptInterpolationType] = None,
+    ):
+        if not self.alias or not version:
             return
         cache_data = {}
@@ -140,14 +180,14 @@ class Prompt:
             cache_data[self.alias] = {}
         # Cache the prompt
-        cache_data[self.alias][self.version] = {
+        cache_data[self.alias][version] = {
             "alias": self.alias,
-            "version": self.version,
-            "template": self._text_template,
-            "messages_template": self._messages_template,
-            "prompt_version_id": self._prompt_version_id,
-            "type": self._type,
-            "interpolation_type": self._interpolation_type,
+            "version": version,
+            "template": text_template,
+            "messages_template": messages_template,
+            "prompt_version_id": prompt_version_id,
+            "type": type,
+            "interpolation_type": interpolation_type,
         }
         # Ensure directory exists
@@ -163,12 +203,22 @@ class Prompt:
         fallback_to_cache: bool = True,
         write_to_cache: bool = True,
         default_to_cache: bool = True,
+        refresh: Optional[int] = 60,
     ):
+        if refresh:
+            default_to_cache = True
+            write_to_cache = False
         if self.alias is None:
             raise TypeError(
                 "Unable to pull prompt from Confident AI when no alias is provided."
             )
+        # Manage background prompt polling
+        loop = get_or_create_event_loop()
+        loop.run_until_complete(
+            self.create_polling_task(version, refresh)
+        )
         if default_to_cache:
             try:
                 cached_prompt = self._read_from_cache(self.alias, version)
@@ -200,11 +250,11 @@ class Prompt:
             try:
                 data, _ = api.send_request(
                     method=HttpMethods.GET,
-                    endpoint=Endpoints.PROMPTS_ENDPOINT,
-                    params={"alias": self.alias, "version": version},
+                    endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
+                    url_params={"alias": self.alias, "versionId": version or "latest"},
                 )
                 response = PromptHttpResponse(
-                    promptVersionId=data["promptVersionId"],
+                    id=data["id"],
                     text=data.get("text", None),
                     messages=data.get("messages", None),
                     type=data["type"],
@@ -243,7 +293,7 @@ class Prompt:
             self.version = version or "latest"
             self._text_template = response.text
             self._messages_template = response.messages
-            self._prompt_version_id = response.promptVersionId
+            self._prompt_version_id = response.id
             self._type = response.type
             self._interpolation_type = response.interpolation_type
@@ -254,7 +304,14 @@ class Prompt:
                 description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Done! ({time_taken}s)",
             )
             if write_to_cache:
-                self._write_to_cache()
+                self._write_to_cache(
+                    version=version or "latest",
+                    text_template=response.text,
+                    messages_template=response.messages,
+                    prompt_version_id=response.id,
+                    type=response.type,
+                    interpolation_type=response.interpolation_type,
+                )
     def push(
         self,
@@ -300,3 +357,60 @@ class Prompt:
                 "✅ Prompt successfully pushed to Confident AI! View at "
                 f"[link={link}]{link}[/link]"
             )
+    ############################################
+    ### Polling
+    ############################################
+    async def create_polling_task(
+        self,
+        version: Optional[str],
+        refresh: Optional[int] = 60,
+    ):
+        if version is None:
+            return
+        # If polling task doesn't exist, start it
+        polling_task: Optional[asyncio.Task] = self._polling_tasks.get(version)
+        if refresh:
+            self._refresh_map[version] = refresh
+            if not polling_task:
+                self._polling_tasks[version] = asyncio.create_task(
+                    self.poll(version)
+                )
+        # If invalid `refresh`, stop the task
+        else:
+            if polling_task:
+                polling_task.cancel()
+            self._polling_tasks.pop(version)
+            self._refresh_map.pop(version)
+    async def poll(self, version: Optional[str] = None):
+        api = Api()
+        while True:
+            try:
+                data, _ = api.send_request(
+                    method=HttpMethods.GET,
+                    endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
+                    url_params={"alias": self.alias, "versionId": version or "latest"},
+                )
+                response = PromptHttpResponse(
+                    id=data["id"],
+                    text=data.get("text", None),
+                    messages=data.get("messages", None),
+                    type=data["type"],
+                    interpolation_type=data["interpolationType"],
+                )
+                self._write_to_cache(
+                    version=version or "latest",
+                    text_template=response.text,
+                    messages_template=response.messages,
+                    prompt_version_id=response.id,
+                    type=response.type,
+                    interpolation_type=response.interpolation_type,
+                )
+            except Exception as e:
+                pass
+            await asyncio.sleep(self._refresh_map[version])

deepeval/synthesizer/synthesizer.py CHANGED Viewed

@@ -361,7 +361,7 @@ class Synthesizer:
                 progress if _progress is None else nullcontext()
             ):
-                for i, context in enumerate(contexts):
+                for context_index, context in enumerate(contexts):
                     # Calculate pbar lengths
                     should_style = (
                         self.styling_config.input_format
@@ -381,7 +381,7 @@ class Synthesizer:
                     # Add pbars
                     pbar_generate_goldens_id = add_pbar(
                         progress,
-                        f"\t⚡ Generating goldens from context #{i}",
+                        f"\t⚡ Generating goldens from context #{context_index}",
                         total=1 + max_goldens_per_context,
                     )
                     pbar_generate_inputs_id = add_pbar(
@@ -421,7 +421,9 @@ class Synthesizer:
                         progress, pbar_generate_goldens_id, remove=False
                     )
-                    for j, data in enumerate(qualified_synthetic_inputs):
+                    for input_index, data in enumerate(
+                        qualified_synthetic_inputs
+                    ):
                         # Evolve input
                         evolved_input, evolutions_used = self._evolve_input(
                             input=data.input,
@@ -429,7 +431,9 @@ class Synthesizer:
                             num_evolutions=self.evolution_config.num_evolutions,
                             evolutions=self.evolution_config.evolutions,
                             progress=progress,
-                            pbar_evolve_input_id=pbar_evolve_input_ids[j],
+                            pbar_evolve_input_id=pbar_evolve_input_ids[
+                                input_index
+                            ],
                             remove_pbar=False,
                         )
@@ -441,7 +445,9 @@ class Synthesizer:
                                 task=self.styling_config.task,
                             )
                             update_pbar(
-                                progress, pbar_evolve_input_ids[j], remove=False
+                                progress,
+                                pbar_evolve_input_ids[input_index],
+                                remove=False,
                             )
                             res: SyntheticData = self._generate_schema(
                                 prompt,
@@ -455,15 +461,15 @@ class Synthesizer:
                             input=evolved_input,
                             context=context,
                             source_file=(
-                                source_files[i]
+                                source_files[context_index]
                                 if source_files is not None
                                 else None
                             ),
                             additional_metadata={
                                 "evolutions": evolutions_used,
-                                "synthetic_input_quality": scores[j],
+                                "synthetic_input_quality": scores[input_index],
                                 "context_quality": (
-                                    _context_scores[i]
+                                    _context_scores[context_index]
                                     if _context_scores is not None
                                     else None
                                 ),
@@ -480,7 +486,9 @@ class Synthesizer:
                             res = self._generate(prompt)
                             golden.expected_output = res
                             update_pbar(
-                                progress, pbar_evolve_input_ids[j], remove=False
+                                progress,
+                                pbar_evolve_input_ids[input_index],
+                                remove=False,
                             )
                         goldens.append(golden)

deepeval/tracing/api.py CHANGED Viewed

@@ -86,6 +86,9 @@ class BaseApiSpan(BaseModel):
     cost_per_output_token: Optional[float] = Field(
         None, alias="costPerOutputToken"
     )
+    token_intervals: Optional[Dict[str, str]] = Field(
+        None, alias="tokenIntervals"
+    )
     ## evals
     metric_collection: Optional[str] = Field(None, alias="metricCollection")

deepeval/tracing/context.py CHANGED Viewed

@@ -4,7 +4,6 @@ from contextvars import ContextVar
 from deepeval.tracing.types import BaseSpan, Trace
 from deepeval.test_case.llm_test_case import ToolCall, LLMTestCase
 from deepeval.tracing.types import LlmSpan, RetrieverSpan
-from deepeval.metrics import BaseMetric
 from deepeval.prompt.prompt import Prompt
 current_span_context: ContextVar[Optional[BaseSpan]] = ContextVar(
@@ -117,6 +116,7 @@ def update_llm_span(
     output_token_count: Optional[float] = None,
     cost_per_input_token: Optional[float] = None,
     cost_per_output_token: Optional[float] = None,
+    token_intervals: Optional[Dict[float, str]] = None,
     prompt: Optional[Prompt] = None,
 ):
     current_span = current_span_context.get()
@@ -132,6 +132,8 @@ def update_llm_span(
         current_span.cost_per_input_token = cost_per_input_token
     if cost_per_output_token:
         current_span.cost_per_output_token = cost_per_output_token
+    if token_intervals:
+        current_span.token_intervals = token_intervals
     if prompt:
         current_span.prompt = prompt

deepeval/tracing/tracing.py CHANGED Viewed

@@ -114,7 +114,7 @@ class TraceManager:
             self._print_trace_status(
                 message=f"WARNING: Exiting with {queue_size + in_flight} abaonded trace(s).",
                 trace_worker_status=TraceWorkerStatus.WARNING,
-                description=f"Set {CONFIDENT_TRACE_FLUSH}=YES as an environment variable to flush remaining traces to Confident AI.",
+                description=f"Set {CONFIDENT_TRACE_FLUSH}=1 as an environment variable to flush remaining traces to Confident AI.",
             )
     def mask(self, data: Any):
@@ -314,7 +314,7 @@ class TraceManager:
                     env_text,
                     message + ":",
                     description,
-                    f"\nTo disable dev logging, set {CONFIDENT_TRACE_VERBOSE}=NO as an environment variable.",
+                    f"\nTo disable dev logging, set {CONFIDENT_TRACE_VERBOSE}=0 as an environment variable.",
                 )
             else:
                 console.print(message_prefix, env_text, message)
@@ -717,6 +717,16 @@ class TraceManager:
             api_span.input_token_count = span.input_token_count
             api_span.output_token_count = span.output_token_count
+            processed_token_intervals = {}
+            if span.token_intervals:
+                for key, value in span.token_intervals.items():
+                    time = to_zod_compatible_iso(
+                        perf_counter_to_datetime(key),
+                        microsecond_precision=True,
+                    )
+                    processed_token_intervals[time] = value
+                api_span.token_intervals = processed_token_intervals
         return api_span

deepeval/tracing/types.py CHANGED Viewed

@@ -102,6 +102,9 @@ class LlmSpan(BaseSpan):
     cost_per_output_token: Optional[float] = Field(
         None, serialization_alias="costPerOutputToken"
     )
+    token_intervals: Optional[Dict[float, str]] = Field(
+        None, serialization_alias="tokenTimes"
+    )
     # for serializing `prompt`
     model_config = {"arbitrary_types_allowed": True}

deepeval/tracing/utils.py CHANGED Viewed

@@ -100,10 +100,14 @@ def make_json_serializable(obj):
     return _serialize(obj)
-def to_zod_compatible_iso(dt: datetime) -> str:
+def to_zod_compatible_iso(
+    dt: datetime, microsecond_precision: bool = False
+) -> str:
     return (
         dt.astimezone(timezone.utc)
-        .isoformat(timespec="milliseconds")
+        .isoformat(
+            timespec="microseconds" if microsecond_precision else "milliseconds"
+        )
         .replace("+00:00", "Z")
     )

{deepeval-3.5.0.dist-info → deepeval-3.5.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: deepeval
-Version: 3.5.0
+Version: 3.5.1
 Summary: The LLM Evaluation Framework
 Home-page: https://github.com/confident-ai/deepeval
 License: Apache-2.0

{deepeval-3.5.0.dist-info → deepeval-3.5.1.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
-deepeval/_version.py,sha256=xgoMNdDXsY3c4GfV1_DVK-xGdMOp5KCDaKln5j0PJdY,27
+deepeval/_version.py,sha256=4-DIaf0_guINnwTWGKRHVcep723FM_T7p_K6jccjok0,27
 deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
 deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
 deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
@@ -138,7 +138,7 @@ deepeval/cli/test.py,sha256=kSIFMRTAfVzBJ4OitwvT829-ylV7UzPMP57P2DePS-Q,5482
 deepeval/cli/types.py,sha256=_7KdthstHNc-JKCWrfpDQCf_j8h9PMxh0qJCHmVXJr0,310
 deepeval/cli/utils.py,sha256=F4-yuONzk4ojDoSLjI9RYERB7HOD412iZ2lNlSCq4wk,5601
 deepeval/confident/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-deepeval/confident/api.py,sha256=-2i3IBLtj5bUIImwOF6ltGVR3ZyViIbIC38XxwWvf54,8318
+deepeval/confident/api.py,sha256=bOC71TaVAEgoXFtJ9yMo0-atmUUdBuvaclMGczMcR6o,8455
 deepeval/confident/types.py,sha256=-slFhDof_1maMgpLxqDRZv6kz6ZVY2hP_0uj_aveJKU,533
 deepeval/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deepeval/config/settings.py,sha256=e7sk6_3I14hG457e75DoJd9Ojo3rOkpBZzsMYlj4gKQ,18139
@@ -172,7 +172,7 @@ deepeval/integrations/hugging_face/rich_manager.py,sha256=WvFtPGpPmGeg2Ftsnojga6
 deepeval/integrations/hugging_face/tests/test_callbacks.py,sha256=88Wyg-aDaXujj9jHeGdFF3ITSl2-y7eaJGWgSyvvDi8,4607
 deepeval/integrations/hugging_face/utils.py,sha256=HUKdQcTIb76Ct69AS737oPxmlVxk5fw2UbT2pLn-o8k,1817
 deepeval/integrations/langchain/__init__.py,sha256=EJz0UvoLjBG5cftOJNJQ5qLawwHHRnSQLgBu_SaqZ1Q,94
-deepeval/integrations/langchain/callback.py,sha256=Cp3t0zPwXYnj5Hs3PhYzFTLmQF7cc3S2eH2vnHwiT8k,15876
+deepeval/integrations/langchain/callback.py,sha256=hps3eq8rYZIvxbGtCyAxmb0VTTgAX1HqDBxQLGxZYho,16450
 deepeval/integrations/langchain/patch.py,sha256=yWkdGMzRVggBcPFx__HRlUoYtASLh7Vef6mqOIZ9LDY,992
 deepeval/integrations/langchain/utils.py,sha256=gSs4VOIzftVS9VLbQSs94R-Pi7D6CGFt84SzccwOsWg,3209
 deepeval/integrations/llama_index/__init__.py,sha256=zBwUFQXDp6QFtp1cfANy8ucV08rjc93nyxM9o9hWjT0,216
@@ -183,11 +183,11 @@ deepeval/integrations/pydantic_ai/__init__.py,sha256=36fBKBLRo1y5jFlj0Y4xhDJsiq4
 deepeval/integrations/pydantic_ai/otel.py,sha256=2DpO3RapdztXPlT9BWhQfF4dJDMyp2X7YvuplJ0SwC8,1661
 deepeval/integrations/pydantic_ai/patcher.py,sha256=wszU2YROZAQovyz1ZNRvTtsuJ5By_x4SF6yjtmItcNk,12210
 deepeval/key_handler.py,sha256=damdQEBLGy4IVk5DR5-E3blIZdLbcMtyeGAFn_4_SG4,6505
-deepeval/metrics/__init__.py,sha256=xofaK_bJq0QCSerSWYjHYRXXch9YQwZHxIfVAv1G7fo,4012
+deepeval/metrics/__init__.py,sha256=nvO0Wv2JROjK1I9MDNIFUJlrRAZI2C0xbGYSBZK5q4g,4013
 deepeval/metrics/answer_relevancy/__init__.py,sha256=WbZUpoSg2GQoqJ4VIRirVVQ1JDx5xwT-RskwqNKfWGM,46
 deepeval/metrics/answer_relevancy/answer_relevancy.py,sha256=vlc7BzUAtYVW62d5Qa-fIHSLOX239KFwCE7fCGP8jGE,10935
 deepeval/metrics/answer_relevancy/schema.py,sha256=N8wIBh4qwk4-BZOEyPJM-MB2_0dbkqXHv0aCfsIkROo,405
-deepeval/metrics/answer_relevancy/template.py,sha256=vU6yAsiCYtvx5S1g74WeEdJmuGvd2ZtwDDqM5-jfYkM,5174
+deepeval/metrics/answer_relevancy/template.py,sha256=InlbD3nufOFmohX3c7tnBwxDbcXwYbc57nPjIiW0Gmc,4030
 deepeval/metrics/arena_g_eval/__init__.py,sha256=pVDIsWD_DLumOLegJrVSozcWwzsaxJXE5cIN7KxCzws,37
 deepeval/metrics/arena_g_eval/arena_g_eval.py,sha256=B4Gjct3w5VGPxmumBblFVajdUIdWJTNR0hvMuhgIFg0,11661
 deepeval/metrics/arena_g_eval/schema.py,sha256=3wipvUpZNO0O4QuWFy1LaLenfTYxLKldCERmP3sVtYI,288
@@ -236,7 +236,7 @@ deepeval/metrics/dag/utils.py,sha256=66D88fpjIUdVwZvYV8a1L9TlX1wvbCVuE6Y8BFTbpkE
 deepeval/metrics/faithfulness/__init__.py,sha256=RffAtTOSdtWO1gHVMnPI-imJahf3JENOoJRiNw-Xv4g,43
 deepeval/metrics/faithfulness/faithfulness.py,sha256=bYVhHI7Tr7xH0x-7F2LijxRuCCEtLOnXLzncvJLVv60,12887
 deepeval/metrics/faithfulness/schema.py,sha256=2dU9dwwmqpGJcWvY2webERWIfH_tn02xgLghHkAY_eM,437
-deepeval/metrics/faithfulness/template.py,sha256=q5NvVBcUEZgyMy_1zHFGtDNU7PoREFJGOkVQbZf8r-g,7117
+deepeval/metrics/faithfulness/template.py,sha256=RuZ0LFm4BjZ8lhVrKPgU3ecHszwkF0fe5-BxAkaP5AA,5839
 deepeval/metrics/g_eval/__init__.py,sha256=HAhsQFVq9LIpZXPN00Jc_WrMXrh47NIT86VnUpWM4_4,102
 deepeval/metrics/g_eval/g_eval.py,sha256=JI3rTaEClYgiL9oLaVFh7sunqGoXI7qBeBgi9RkSwDs,14327
 deepeval/metrics/g_eval/schema.py,sha256=V629txuDrr_2IEKEsgJVYYZb_pkdfcltQV9ZjvxK5co,287
@@ -402,8 +402,8 @@ deepeval/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
 deepeval/plugins/plugin.py,sha256=_dwsdx4Dg9DbXxK3f7zJY4QWTJQWc7QE1HmIg2Zjjag,1515
 deepeval/progress_context.py,sha256=ZSKpxrE9sdgt9G3REKnVeXAv7GJXHHVGgLynpG1Pudw,3557
 deepeval/prompt/__init__.py,sha256=M99QTWdxOfiNeySGCSqN873Q80PPxqRvjLq4_Mw-X1w,49
-deepeval/prompt/api.py,sha256=VxRxnnCPiTyiIzP4MrpB7dgefgRNk3xOH5Dn5Y1Hk0o,1035
-deepeval/prompt/prompt.py,sha256=nTZ5lkjMj4YgtPOL0Tk4uzCGAEpZN7ityOXsrrm5mFI,11110
+deepeval/prompt/api.py,sha256=q0CU57eGZkCL3CYIFS6HG-JLyptRhqJRLU9Al8roCRk,1705
+deepeval/prompt/prompt.py,sha256=XVGFOK4eQfjWKm-N1GtRilWqtjLM3JNmIqeuYxQx6Xk,15170
 deepeval/prompt/utils.py,sha256=Gk0zj_9BK8MQccs8GmiC8o-YVtkou6ZJEz8kWgW5Mog,1678
 deepeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deepeval/red_teaming/README.md,sha256=BY5rAdpp3-sMMToEKwq0Nsd9ivkGDzPE16DeDb8GY7U,154
@@ -421,7 +421,7 @@ deepeval/synthesizer/chunking/context_generator.py,sha256=0c--WxTiGLMF0l5sgjeWQF
 deepeval/synthesizer/chunking/doc_chunker.py,sha256=5PZnxNDuNCngz3wZWG5QeCINec6cIq1ko1bwaDNhxAI,9416
 deepeval/synthesizer/config.py,sha256=vcSi6upnmd667dAGANTTdPmY0z5sQ8Ctal7Xr4-tbhA,1934
 deepeval/synthesizer/schema.py,sha256=PIv3012VMg_v-Ylwn08-4tNjf4QShBSg-kaCkgtdA88,879
-deepeval/synthesizer/synthesizer.py,sha256=SGH--Xd6VRcnI6F2pP4co8F_8r2CvNtgvbOLEKDOZW8,59709
+deepeval/synthesizer/synthesizer.py,sha256=yyJQgdoDK4-bc92N7fY1-I5DrnUjQEUTQP0UMmwJoJ0,60045
 deepeval/synthesizer/templates/__init__.py,sha256=C-wSGQeMRxTdSBJbgeyAM5Iu6mkHVSYbNfz0AY9K5Yc,209
 deepeval/synthesizer/templates/template.py,sha256=nTH-k8XbvZQD_lagsaf6kmT6oylFjQ7gEseHYB1Zyso,39807
 deepeval/synthesizer/templates/template_extraction.py,sha256=NkpzP-MkoefokVJBZn9s1ErDvI3o9ocY5_ZY85zj4KE,1972
@@ -443,8 +443,8 @@ deepeval/test_run/hooks.py,sha256=Qnd06bk9RJN4WmFUzJrBAi3Xj261hzyzI2iRmG8wbKw,37
 deepeval/test_run/hyperparameters.py,sha256=f7M07w1EfT8YPtiD9xVIVYa3ZewkxewSkK7knwv0YlY,2289
 deepeval/test_run/test_run.py,sha256=eCo_NESZruIAtSu2feSbz9AtOcu9v92TNiS0OON_i-I,33611
 deepeval/tracing/__init__.py,sha256=OPsA_VmYNLC1M-WYJ37R6SxGyLnoXIkuyMBTcAneeao,530
-deepeval/tracing/api.py,sha256=2e40rVCUSODj_M1lGuzmg9SNxucMK4b0G0lqkG5Buyw,4769
-deepeval/tracing/context.py,sha256=oc7QAUVLGTiMw9oYq5lc_5JoKLzmmAkUnvxvCNyVP1A,5242
+deepeval/tracing/api.py,sha256=rq4rB5f3tfrv6l4mRJmDrwRj5CH4dyatwxhG7p8xbVk,4867
+deepeval/tracing/context.py,sha256=mA82v7nXVLdM6tQrul8zt7H_sap-8Nfrm2uCpbT5ffM,5337
 deepeval/tracing/offline_evals/__init__.py,sha256=bEniJAl7PmS9u2ksiOTfHtlCPJ9_CJV5R6umrUOX5MM,102
 deepeval/tracing/offline_evals/api.py,sha256=eBfqh2uWyeRkIeGhjrN1bTQzAEow-XPubs-42WEZ2QQ,510
 deepeval/tracing/offline_evals/span.py,sha256=pXqTVXs-WnjRVpCYYEbNe0zSM6Wz9GsKHsM5ZcWxrmM,1802
@@ -455,12 +455,12 @@ deepeval/tracing/otel/exporter.py,sha256=dXQd834zm5rm1ss9pWkBBlk-JSdtiw7aFLso2hM
 deepeval/tracing/otel/utils.py,sha256=g8yAzhqbPh1fOKCWkfNekC6AVotLfu1SUcfNMo6zii8,9786
 deepeval/tracing/patchers.py,sha256=DAPNkhrDtoeyJIVeQDUMhTz-xGcXu00eqjQZmov8FiU,3096
 deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
-deepeval/tracing/tracing.py,sha256=StvwFEG3MG67n7PBEyDDycdj0myMbP3LMB_FBhaZH-Y,38741
-deepeval/tracing/types.py,sha256=3w5HEI6y4zuzVr8xGEEzDviLZCX_s_pK85qbwnyf1aY,5196
-deepeval/tracing/utils.py,sha256=eTEickbDvRiOu1twNolh4sHnjZF49vqdLgI74BudeTw,6357
+deepeval/tracing/tracing.py,sha256=Ot0wzUHxhaK4wZov8cgai-i6kiyZUvNzj9MyRhbjZUg,39191
+deepeval/tracing/types.py,sha256=l_utWKerNlE5H3mOKpeUJLsvpP3cMyjH7HRANNgTmSQ,5306
+deepeval/tracing/utils.py,sha256=w_kdhuyBCygllnbqLpDdKJqpJo42t3ZMlGhNicV2A8c,6467
 deepeval/utils.py,sha256=EimWDwI1pKCE8vl6kuTnGbGT6ep9zHL5sZ0o-gj49XI,16857
-deepeval-3.5.0.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
-deepeval-3.5.0.dist-info/METADATA,sha256=KBAB5m11q4GAhVwCJBmXZDtaYtKoAO3sQ0vg-ajFRLg,18682
-deepeval-3.5.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
-deepeval-3.5.0.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
-deepeval-3.5.0.dist-info/RECORD,,
+deepeval-3.5.1.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
+deepeval-3.5.1.dist-info/METADATA,sha256=KDVwTo18ZlKNfIb_f8oomBUiceMMj7NqvVSKNvN1wbk,18682
+deepeval-3.5.1.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
+deepeval-3.5.1.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
+deepeval-3.5.1.dist-info/RECORD,,

{deepeval-3.5.0.dist-info → deepeval-3.5.1.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{deepeval-3.5.0.dist-info → deepeval-3.5.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{deepeval-3.5.0.dist-info → deepeval-3.5.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

deepeval 3.5.0__py3-none-any.whl → 3.5.1__py3-none-any.whl

deepeval 3.5.0py3-none-any.whl → 3.5.1py3-none-any.whl