PyPI - deepeval - Versions diffs - 3.5.8__py3-none-any.whl → 3.6.0__py3-none-any.whl - Mend

deepeval 3.5.8py3-none-any.whl → 3.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

deepeval/_version.py +1 -1
deepeval/config/settings_manager.py +1 -1
deepeval/contextvars.py +25 -0
deepeval/dataset/__init__.py +8 -2
deepeval/evaluate/execute.py +15 -3
deepeval/integrations/pydantic_ai/__init__.py +3 -3
deepeval/integrations/pydantic_ai/agent.py +9 -327
deepeval/integrations/pydantic_ai/instrumentator.py +196 -0
deepeval/integrations/pydantic_ai/otel.py +8 -2
deepeval/openai_agents/__init__.py +4 -3
deepeval/openai_agents/agent.py +8 -166
deepeval/openai_agents/callback_handler.py +63 -62
deepeval/openai_agents/extractors.py +83 -7
deepeval/openai_agents/patch.py +255 -61
deepeval/openai_agents/runner.py +348 -335
deepeval/tracing/context.py +1 -0
deepeval/tracing/otel/exporter.py +236 -174
deepeval/tracing/otel/utils.py +95 -7
deepeval/tracing/tracing.py +3 -0
deepeval/utils.py +4 -3
{deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/METADATA +1 -1
{deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/RECORD +25 -25
deepeval/integrations/pydantic_ai/patcher.py +0 -484
deepeval/integrations/pydantic_ai/utils.py +0 -323
{deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/LICENSE.md +0 -0
{deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/WHEEL +0 -0
{deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/entry_points.txt +0 -0

deepeval/tracing/otel/utils.py CHANGED Viewed

@@ -99,17 +99,32 @@ def validate_llm_test_case_data(
 def check_llm_input_from_gen_ai_attributes(
     span: ReadableSpan,
 ) -> Tuple[Optional[list], Optional[dict]]:
+    input = None
+    output = None
     try:
-        input = json.loads(span.attributes.get("events"))
-        if input and isinstance(input, list):
-            # check if the last event is a genai choice
-            last_event = input.pop()
-            if last_event and last_event.get("event.name") == "gen_ai.choice":
-                return input, last_event
+        input = json.loads(span.attributes.get("gen_ai.input.messages"))
     except Exception as e:
         pass
+    try:
+        output = json.loads(span.attributes.get("gen_ai.output.messages"))
+    except Exception as e:
+        pass
+    if input is None and output is None:
+        try:
+            input = json.loads(span.attributes.get("events"))
+            if input and isinstance(input, list):
+                # check if the last event is a genai choice
+                last_event = input.pop()
+                if (
+                    last_event
+                    and last_event.get("event.name") == "gen_ai.choice"
+                ):
+                    output = last_event
+        except Exception as e:
+            pass
-    return None, None
+    return input, output
 def check_tool_name_from_gen_ai_attributes(span: ReadableSpan) -> Optional[str]:
@@ -307,3 +322,76 @@ def post_test_run(traces: List[Trace], test_run_id: Optional[str]):
         test_run.add_test_case(case)
     # return test_run_manager.post_test_run(test_run) TODO: add after test run with metric collection is implemented
+def check_pydantic_ai_agent_input_output(
+    span: ReadableSpan,
+) -> Tuple[Optional[Any], Optional[Any]]:
+    input_val: Optional[Any] = None
+    output_val: Optional[Any] = None
+    # Input (pydantic_ai.all_messages) - slice up to and including the first 'user' message
+    try:
+        raw = span.attributes.get("pydantic_ai.all_messages")
+        if raw:
+            messages = raw
+            if isinstance(messages, str):
+                messages = json.loads(messages)
+            elif isinstance(messages, tuple):
+                messages = list(messages)
+            if isinstance(messages, list):
+                normalized = []
+                for m in messages:
+                    if isinstance(m, str):
+                        try:
+                            m = json.loads(m)
+                        except Exception:
+                            pass
+                    normalized.append(m)
+                first_user_idx = None
+                for i, m in enumerate(normalized):
+                    role = None
+                    if isinstance(m, dict):
+                        role = m.get("role") or m.get("author")
+                    if role == "user":
+                        first_user_idx = i
+                        break
+                input_val = (
+                    normalized
+                    if first_user_idx is None
+                    else normalized[: first_user_idx + 1]
+                )
+    except Exception:
+        pass
+    # Output (agent final_result)
+    try:
+        if span.attributes.get("confident.span.type") == "agent":
+            output_val = span.attributes.get("final_result")
+    except Exception:
+        pass
+    return input_val, output_val
+def check_tool_output(span: ReadableSpan):
+    try:
+        return span.attributes.get("tool_response")
+    except Exception as e:
+        pass
+    return None
+def check_pydantic_ai_trace_input_output(
+    span: ReadableSpan,
+) -> Tuple[Optional[Any], Optional[Any]]:
+    input_val: Optional[Any] = None
+    output_val: Optional[Any] = None
+    if not span.parent:
+        input_val, output_val = check_pydantic_ai_agent_input_output(span)
+    return input_val, output_val

deepeval/tracing/tracing.py CHANGED Viewed

@@ -796,6 +796,9 @@ class Observer:
         # Now create the span instance with the correct trace_uuid and parent_uuid
         span_instance = self.create_span_instance()
+        # stash call arguments so they are available during the span lifetime
+        setattr(span_instance, "_function_kwargs", self.function_kwargs)
         # Add the span to active spans and to its trace
         trace_manager.add_span(span_instance)
         trace_manager.add_span_to_trace(span_instance)

deepeval/utils.py CHANGED Viewed

@@ -21,12 +21,9 @@ from rich.progress import Progress
 from rich.console import Console, Theme
 from deepeval.confident.api import set_confident_api_key
-from deepeval.constants import CONFIDENT_OPEN_BROWSER
 from deepeval.config.settings import get_settings
 from deepeval.config.utils import (
-    parse_bool,
     get_env_bool,
-    bool_to_env_str,
     set_env_bool,
 )
@@ -418,6 +415,10 @@ def normalize_text(text: str) -> str:
     return white_space_fix(remove_articles(remove_punc(lower(text))))
+def is_missing(s: Optional[str]) -> bool:
+    return s is None or (isinstance(s, str) and s.strip() == "")
 ###############################################
 # Source: https://github.com/tingofurro/summac
 ###############################################

{deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: deepeval
-Version: 3.5.8
+Version: 3.6.0
 Summary: The LLM Evaluation Framework
 Home-page: https://github.com/confident-ai/deepeval
 License: Apache-2.0

{deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
-deepeval/_version.py,sha256=unWkmwnFycd1EkbcelGqbdnCdsoFCoHp1cgSea6zrS0,27
+deepeval/_version.py,sha256=6gidwc6mZETsJYHWtOUcpPNi74ZeM3A60PXBZMkuLUY,27
 deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
 deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
 deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
@@ -142,10 +142,11 @@ deepeval/confident/api.py,sha256=bOC71TaVAEgoXFtJ9yMo0-atmUUdBuvaclMGczMcR6o,845
 deepeval/confident/types.py,sha256=-slFhDof_1maMgpLxqDRZv6kz6ZVY2hP_0uj_aveJKU,533
 deepeval/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deepeval/config/settings.py,sha256=gRRi6nXEUKse13xAShU9MA18zo14vpIgl_R0xJ_0vnM,21314
-deepeval/config/settings_manager.py,sha256=PsBS_5dRJASak2AUDwjhjLSiezNz1fje0R3onoFCKC0,4014
+deepeval/config/settings_manager.py,sha256=enahSZN8krRu7-L94OBCt99fwUIqQtMRL97PlzsuKEY,4021
 deepeval/config/utils.py,sha256=gSOVv18Tx1R72GucbdQesbZLFL-Y9EzbS4p7qd2w_xE,3799
 deepeval/constants.py,sha256=Qe-es-WDPJndgBspEQXxddDCVanrAu03YWCpXsUkdo0,1368
-deepeval/dataset/__init__.py,sha256=rcum_VjBXu8eisCdr6sl84BgoZUs3x0tYbB2PnPtHGY,212
+deepeval/contextvars.py,sha256=oqXtuYiKd4Zvc1rNoR1gcRBxzZYCGTMVn7XostwvkRI,524
+deepeval/dataset/__init__.py,sha256=N2c-rkuxWYiiJSOZArw0H02Cwo7cnfzFuNYJlvsIBEg,249
 deepeval/dataset/api.py,sha256=ZxkEqAF4nZH_Ys_1f5r9N2LFI_vBcAJxt8eJm7Mplpw,831
 deepeval/dataset/dataset.py,sha256=dDWTSPWN8i_mZBOAgZt0r5Id6q6aeDf8jAKxv81mP1o,51113
 deepeval/dataset/golden.py,sha256=T-rTk4Hw1tANx_Iimv977F6Y4QK3s5OIB4PecU5FJDM,2338
@@ -158,7 +159,7 @@ deepeval/evaluate/api.py,sha256=rkblH0ZFAAdyuF0Ymh7JE1pIJPR9yFuPrn9SQaCEQp4,435
 deepeval/evaluate/compare.py,sha256=tdSJY4E7YJ_zO3dzvpwngZHLiUI2YQcTWJOLI83htsQ,9855
 deepeval/evaluate/configs.py,sha256=QfWjaWNxLsgEe8-5j4PIs5WcSyEckiWt0qdpXSpl57M,928
 deepeval/evaluate/evaluate.py,sha256=NPAJ2iJqJI_RurXKUIC0tft_ozYMIKwZf5iPfmnNhQc,10412
-deepeval/evaluate/execute.py,sha256=fJLBl45Vf4rA4Pm7k932TG-0BNIvf90klQyurXb-b_4,88057
+deepeval/evaluate/execute.py,sha256=7RCjn2GGcjqK6cp9-0BtHL6PPJNw5-KXqXL60GN3G5Y,88672
 deepeval/evaluate/types.py,sha256=IGZ3Xsj0UecPI3JNeTpJaK1gDvlepokfCmHwtItIW9M,831
 deepeval/evaluate/utils.py,sha256=kkliSGzuICeUsXDtlMMPfN95dUKlqarNhfciSffd4gI,23143
 deepeval/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -179,11 +180,10 @@ deepeval/integrations/llama_index/__init__.py,sha256=zBwUFQXDp6QFtp1cfANy8ucV08r
 deepeval/integrations/llama_index/agent/patched.py,sha256=4JbH0WQmt4lct7xxIH0phj8_Y-V35dgVv7DEDXK0jZI,2149
 deepeval/integrations/llama_index/handler.py,sha256=eqI1n8E4MsvfKoFs5Zrm9IdCR7g9eBgNedISs7UkU_I,8947
 deepeval/integrations/llama_index/utils.py,sha256=mxW71-3PjvBvJpLIU0kNWuTzCidy5l_-roLt8ZyWYA0,2599
-deepeval/integrations/pydantic_ai/__init__.py,sha256=0-GZpWgCnFI-fVHI-3DosWQK85rk6CoRRhl4AiytBAw,258
-deepeval/integrations/pydantic_ai/agent.py,sha256=HxfeTLsdWGgRMy00ymgXdE6dcFDmFBsdgfl9BbvyJns,12311
-deepeval/integrations/pydantic_ai/otel.py,sha256=2DpO3RapdztXPlT9BWhQfF4dJDMyp2X7YvuplJ0SwC8,1661
-deepeval/integrations/pydantic_ai/patcher.py,sha256=yy4SZRmRhgYxh6qGVWWf8DnSMCDA9GLkFw1HbPToQ1w,17696
-deepeval/integrations/pydantic_ai/utils.py,sha256=734e9un-fn5V7MueAmVsXh304qgumv_fdcmdOC4HrJw,10998
+deepeval/integrations/pydantic_ai/__init__.py,sha256=UIkXn_g6h9LTQXG1PaWu1eCFkCssIwG48WSvN46UWgU,202
+deepeval/integrations/pydantic_ai/agent.py,sha256=4wRV25O1tC-txH2j3TNJWry6gDNBqqThj7zgFKBxJpw,606
+deepeval/integrations/pydantic_ai/instrumentator.py,sha256=tGHuP7nn4jL9bUFR2fZWbF4k_EhF5JXOnCwQm-tmDKc,6974
+deepeval/integrations/pydantic_ai/otel.py,sha256=0OuIpmaMtEt1dFWFZtYAiZ9hVCWweEWr1TRHYcDb4I8,1918
 deepeval/key_handler.py,sha256=damdQEBLGy4IVk5DR5-E3blIZdLbcMtyeGAFn_4_SG4,6505
 deepeval/metrics/__init__.py,sha256=nvO0Wv2JROjK1I9MDNIFUJlrRAZI2C0xbGYSBZK5q4g,4013
 deepeval/metrics/answer_relevancy/__init__.py,sha256=WbZUpoSg2GQoqJ4VIRirVVQ1JDx5xwT-RskwqNKfWGM,46
@@ -394,12 +394,12 @@ deepeval/openai/__init__.py,sha256=g6ErXRZIJ08XkWJQPdnwWKk_dSOi26ucY23gx62c6OA,1
 deepeval/openai/extractors.py,sha256=q062nlYKuPVwqfLFYCD1yWv7xHF1U_XrYdAp5ve2l_E,4942
 deepeval/openai/patch.py,sha256=tPDqXaBScBJveM9P5xLT_mVwkubw0bOey-efvdjZIfg,7466
 deepeval/openai/utils.py,sha256=-84VZGUsnzRkYAFWc_DGaGuQTDCUItk0VtUTdjtSxg4,2748
-deepeval/openai_agents/__init__.py,sha256=u-e9laod3LyPfLcI5lr7Yhk8ArfWvlpr-D4_idWIt0A,321
-deepeval/openai_agents/agent.py,sha256=gZcmfqTgrQaJV8g6ChmmdpyArEp6oDIqHSaYPDEd344,6100
-deepeval/openai_agents/callback_handler.py,sha256=jrV2Uv9FjfU1BQQe6V_ltT3QS8ZcalxMbqzJI2vvJXo,4713
-deepeval/openai_agents/extractors.py,sha256=0jZxwgY1NQ3mMxVWPpLcMpKlbj-aYV7rwuzRzG8hdZs,11529
-deepeval/openai_agents/patch.py,sha256=zSmRV5yOReHC6IylhT93SM1nQpmH3sEWfYcJqa_iM84,3684
-deepeval/openai_agents/runner.py,sha256=U8Kh4jHhDIYVkIIxytcGCKRFHdgxxhpATHd9jnbh1Eg,10999
+deepeval/openai_agents/__init__.py,sha256=F4c6MtsdV7LWj0YamQcMGs4_u5sOYZJXWOQP8kV5xUg,314
+deepeval/openai_agents/agent.py,sha256=_SQdd0JzZK-ZvpP7yPEi22Y7fVk16PC00ROahdDQdCQ,951
+deepeval/openai_agents/callback_handler.py,sha256=HSMVKgkbFz6NWOsij0bHlznoDs0CbYYvClc_8345PIc,4637
+deepeval/openai_agents/extractors.py,sha256=jcV-IeWLIh64astJRy_dRBAbUOIab1vp0Wzda7AgVyk,13963
+deepeval/openai_agents/patch.py,sha256=MNvbGe5NLq0rC7L-7lnqcxKhclQvLuBKZnZyAifSHLY,10241
+deepeval/openai_agents/runner.py,sha256=WtHuzhYHgC571uJYGjbTz3R23VaKnlKybGJSRCxM9pY,12310
 deepeval/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deepeval/plugins/plugin.py,sha256=_dwsdx4Dg9DbXxK3f7zJY4QWTJQWc7QE1HmIg2Zjjag,1515
 deepeval/progress_context.py,sha256=ZSKpxrE9sdgt9G3REKnVeXAv7GJXHHVGgLynpG1Pudw,3557
@@ -446,23 +446,23 @@ deepeval/test_run/hyperparameters.py,sha256=f7M07w1EfT8YPtiD9xVIVYa3ZewkxewSkK7k
 deepeval/test_run/test_run.py,sha256=eCo_NESZruIAtSu2feSbz9AtOcu9v92TNiS0OON_i-I,33611
 deepeval/tracing/__init__.py,sha256=OPsA_VmYNLC1M-WYJ37R6SxGyLnoXIkuyMBTcAneeao,530
 deepeval/tracing/api.py,sha256=rq4rB5f3tfrv6l4mRJmDrwRj5CH4dyatwxhG7p8xbVk,4867
-deepeval/tracing/context.py,sha256=mA82v7nXVLdM6tQrul8zt7H_sap-8Nfrm2uCpbT5ffM,5337
+deepeval/tracing/context.py,sha256=rzm42zYzP7jmQJO08AV-Qmw86ik45qRfF4UQNpGcmJw,5338
 deepeval/tracing/offline_evals/__init__.py,sha256=bEniJAl7PmS9u2ksiOTfHtlCPJ9_CJV5R6umrUOX5MM,102
 deepeval/tracing/offline_evals/api.py,sha256=eBfqh2uWyeRkIeGhjrN1bTQzAEow-XPubs-42WEZ2QQ,510
 deepeval/tracing/offline_evals/span.py,sha256=pXqTVXs-WnjRVpCYYEbNe0zSM6Wz9GsKHsM5ZcWxrmM,1802
 deepeval/tracing/offline_evals/thread.py,sha256=bcSGFcZJKnszArOLIlWvnCyt0zSmsd7Xsw5rl4RTVFg,1981
 deepeval/tracing/offline_evals/trace.py,sha256=vTflaTKysKRiYvKA-Nx6PUJ3J6NrRLXiIdWieVcm90E,1868
 deepeval/tracing/otel/__init__.py,sha256=HQsaF5yLPwyW5qg8AOV81_nG_7pFHnatOTHi9Wx3HEk,88
-deepeval/tracing/otel/exporter.py,sha256=dXQd834zm5rm1ss9pWkBBlk-JSdtiw7aFLso2hM53XY,26372
-deepeval/tracing/otel/utils.py,sha256=g8yAzhqbPh1fOKCWkfNekC6AVotLfu1SUcfNMo6zii8,9786
+deepeval/tracing/otel/exporter.py,sha256=YfzV2zyxRwD6FO-qV0PR-y1nk1kInPJp0TtIuWNHbh0,28606
+deepeval/tracing/otel/utils.py,sha256=xydJyUgR1s4hTGcz-onvVHyiT7F6lUXRuh_nprHkKbU,12396
 deepeval/tracing/patchers.py,sha256=DAPNkhrDtoeyJIVeQDUMhTz-xGcXu00eqjQZmov8FiU,3096
 deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
-deepeval/tracing/tracing.py,sha256=b-0T3W6lAEOEGhODx0e-yIwBkm5V46EDNAWS9lcWkD0,42306
+deepeval/tracing/tracing.py,sha256=WFXfGLt58Ia9yCohDZBIUGX6mwieoF8489UziuC-NJI,42458
 deepeval/tracing/types.py,sha256=l_utWKerNlE5H3mOKpeUJLsvpP3cMyjH7HRANNgTmSQ,5306
 deepeval/tracing/utils.py,sha256=w_kdhuyBCygllnbqLpDdKJqpJo42t3ZMlGhNicV2A8c,6467
-deepeval/utils.py,sha256=r8tV_NYJSi6ib-oQw6cLw3L7ZSe4KIJVJc1ng6-kDX4,17179
-deepeval-3.5.8.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
-deepeval-3.5.8.dist-info/METADATA,sha256=7yiM7djTQ2fLy8XfdyecBxMg3cgk3hDAsGLUjRamC44,18743
-deepeval-3.5.8.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
-deepeval-3.5.8.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
-deepeval-3.5.8.dist-info/RECORD,,
+deepeval/utils.py,sha256=-_o3W892u7naX4Y7a8if4mP0Rtkgtapg6Krr1ZBpj0o,17197
+deepeval-3.6.0.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
+deepeval-3.6.0.dist-info/METADATA,sha256=XHRGHFIWxy8_kHr2iCFDXk-tX-ubqjwXtqofMTSVZiQ,18743
+deepeval-3.6.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
+deepeval-3.6.0.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
+deepeval-3.6.0.dist-info/RECORD,,

deepeval 3.5.8__py3-none-any.whl → 3.6.0__py3-none-any.whl

deepeval 3.5.8py3-none-any.whl → 3.6.0py3-none-any.whl