deepeval 3.5.8__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings_manager.py +1 -1
- deepeval/contextvars.py +25 -0
- deepeval/dataset/__init__.py +8 -2
- deepeval/evaluate/execute.py +15 -3
- deepeval/integrations/pydantic_ai/__init__.py +3 -3
- deepeval/integrations/pydantic_ai/agent.py +9 -327
- deepeval/integrations/pydantic_ai/instrumentator.py +196 -0
- deepeval/integrations/pydantic_ai/otel.py +8 -2
- deepeval/openai_agents/__init__.py +4 -3
- deepeval/openai_agents/agent.py +8 -166
- deepeval/openai_agents/callback_handler.py +63 -62
- deepeval/openai_agents/extractors.py +83 -7
- deepeval/openai_agents/patch.py +255 -61
- deepeval/openai_agents/runner.py +348 -335
- deepeval/tracing/context.py +1 -0
- deepeval/tracing/otel/exporter.py +236 -174
- deepeval/tracing/otel/utils.py +95 -7
- deepeval/tracing/tracing.py +3 -0
- deepeval/utils.py +4 -3
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/METADATA +1 -1
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/RECORD +25 -25
- deepeval/integrations/pydantic_ai/patcher.py +0 -484
- deepeval/integrations/pydantic_ai/utils.py +0 -323
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/WHEEL +0 -0
- {deepeval-3.5.8.dist-info → deepeval-3.6.0.dist-info}/entry_points.txt +0 -0
deepeval/tracing/otel/utils.py
CHANGED
|
@@ -99,17 +99,32 @@ def validate_llm_test_case_data(
|
|
|
99
99
|
def check_llm_input_from_gen_ai_attributes(
|
|
100
100
|
span: ReadableSpan,
|
|
101
101
|
) -> Tuple[Optional[list], Optional[dict]]:
|
|
102
|
+
input = None
|
|
103
|
+
output = None
|
|
102
104
|
try:
|
|
103
|
-
input = json.loads(span.attributes.get("
|
|
104
|
-
if input and isinstance(input, list):
|
|
105
|
-
# check if the last event is a genai choice
|
|
106
|
-
last_event = input.pop()
|
|
107
|
-
if last_event and last_event.get("event.name") == "gen_ai.choice":
|
|
108
|
-
return input, last_event
|
|
105
|
+
input = json.loads(span.attributes.get("gen_ai.input.messages"))
|
|
109
106
|
except Exception as e:
|
|
110
107
|
pass
|
|
108
|
+
try:
|
|
109
|
+
output = json.loads(span.attributes.get("gen_ai.output.messages"))
|
|
110
|
+
except Exception as e:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
if input is None and output is None:
|
|
114
|
+
try:
|
|
115
|
+
input = json.loads(span.attributes.get("events"))
|
|
116
|
+
if input and isinstance(input, list):
|
|
117
|
+
# check if the last event is a genai choice
|
|
118
|
+
last_event = input.pop()
|
|
119
|
+
if (
|
|
120
|
+
last_event
|
|
121
|
+
and last_event.get("event.name") == "gen_ai.choice"
|
|
122
|
+
):
|
|
123
|
+
output = last_event
|
|
124
|
+
except Exception as e:
|
|
125
|
+
pass
|
|
111
126
|
|
|
112
|
-
return
|
|
127
|
+
return input, output
|
|
113
128
|
|
|
114
129
|
|
|
115
130
|
def check_tool_name_from_gen_ai_attributes(span: ReadableSpan) -> Optional[str]:
|
|
@@ -307,3 +322,76 @@ def post_test_run(traces: List[Trace], test_run_id: Optional[str]):
|
|
|
307
322
|
test_run.add_test_case(case)
|
|
308
323
|
|
|
309
324
|
# return test_run_manager.post_test_run(test_run) TODO: add after test run with metric collection is implemented
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def check_pydantic_ai_agent_input_output(
|
|
328
|
+
span: ReadableSpan,
|
|
329
|
+
) -> Tuple[Optional[Any], Optional[Any]]:
|
|
330
|
+
input_val: Optional[Any] = None
|
|
331
|
+
output_val: Optional[Any] = None
|
|
332
|
+
|
|
333
|
+
# Input (pydantic_ai.all_messages) - slice up to and including the first 'user' message
|
|
334
|
+
try:
|
|
335
|
+
raw = span.attributes.get("pydantic_ai.all_messages")
|
|
336
|
+
if raw:
|
|
337
|
+
messages = raw
|
|
338
|
+
if isinstance(messages, str):
|
|
339
|
+
messages = json.loads(messages)
|
|
340
|
+
elif isinstance(messages, tuple):
|
|
341
|
+
messages = list(messages)
|
|
342
|
+
|
|
343
|
+
if isinstance(messages, list):
|
|
344
|
+
normalized = []
|
|
345
|
+
for m in messages:
|
|
346
|
+
if isinstance(m, str):
|
|
347
|
+
try:
|
|
348
|
+
m = json.loads(m)
|
|
349
|
+
except Exception:
|
|
350
|
+
pass
|
|
351
|
+
normalized.append(m)
|
|
352
|
+
|
|
353
|
+
first_user_idx = None
|
|
354
|
+
for i, m in enumerate(normalized):
|
|
355
|
+
role = None
|
|
356
|
+
if isinstance(m, dict):
|
|
357
|
+
role = m.get("role") or m.get("author")
|
|
358
|
+
if role == "user":
|
|
359
|
+
first_user_idx = i
|
|
360
|
+
break
|
|
361
|
+
|
|
362
|
+
input_val = (
|
|
363
|
+
normalized
|
|
364
|
+
if first_user_idx is None
|
|
365
|
+
else normalized[: first_user_idx + 1]
|
|
366
|
+
)
|
|
367
|
+
except Exception:
|
|
368
|
+
pass
|
|
369
|
+
|
|
370
|
+
# Output (agent final_result)
|
|
371
|
+
try:
|
|
372
|
+
if span.attributes.get("confident.span.type") == "agent":
|
|
373
|
+
output_val = span.attributes.get("final_result")
|
|
374
|
+
except Exception:
|
|
375
|
+
pass
|
|
376
|
+
|
|
377
|
+
return input_val, output_val
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def check_tool_output(span: ReadableSpan):
|
|
381
|
+
try:
|
|
382
|
+
return span.attributes.get("tool_response")
|
|
383
|
+
except Exception as e:
|
|
384
|
+
pass
|
|
385
|
+
return None
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def check_pydantic_ai_trace_input_output(
|
|
389
|
+
span: ReadableSpan,
|
|
390
|
+
) -> Tuple[Optional[Any], Optional[Any]]:
|
|
391
|
+
input_val: Optional[Any] = None
|
|
392
|
+
output_val: Optional[Any] = None
|
|
393
|
+
|
|
394
|
+
if not span.parent:
|
|
395
|
+
input_val, output_val = check_pydantic_ai_agent_input_output(span)
|
|
396
|
+
|
|
397
|
+
return input_val, output_val
|
deepeval/tracing/tracing.py
CHANGED
|
@@ -796,6 +796,9 @@ class Observer:
|
|
|
796
796
|
# Now create the span instance with the correct trace_uuid and parent_uuid
|
|
797
797
|
span_instance = self.create_span_instance()
|
|
798
798
|
|
|
799
|
+
# stash call arguments so they are available during the span lifetime
|
|
800
|
+
setattr(span_instance, "_function_kwargs", self.function_kwargs)
|
|
801
|
+
|
|
799
802
|
# Add the span to active spans and to its trace
|
|
800
803
|
trace_manager.add_span(span_instance)
|
|
801
804
|
trace_manager.add_span_to_trace(span_instance)
|
deepeval/utils.py
CHANGED
|
@@ -21,12 +21,9 @@ from rich.progress import Progress
|
|
|
21
21
|
from rich.console import Console, Theme
|
|
22
22
|
|
|
23
23
|
from deepeval.confident.api import set_confident_api_key
|
|
24
|
-
from deepeval.constants import CONFIDENT_OPEN_BROWSER
|
|
25
24
|
from deepeval.config.settings import get_settings
|
|
26
25
|
from deepeval.config.utils import (
|
|
27
|
-
parse_bool,
|
|
28
26
|
get_env_bool,
|
|
29
|
-
bool_to_env_str,
|
|
30
27
|
set_env_bool,
|
|
31
28
|
)
|
|
32
29
|
|
|
@@ -418,6 +415,10 @@ def normalize_text(text: str) -> str:
|
|
|
418
415
|
return white_space_fix(remove_articles(remove_punc(lower(text))))
|
|
419
416
|
|
|
420
417
|
|
|
418
|
+
def is_missing(s: Optional[str]) -> bool:
|
|
419
|
+
return s is None or (isinstance(s, str) and s.strip() == "")
|
|
420
|
+
|
|
421
|
+
|
|
421
422
|
###############################################
|
|
422
423
|
# Source: https://github.com/tingofurro/summac
|
|
423
424
|
###############################################
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
|
|
2
|
-
deepeval/_version.py,sha256=
|
|
2
|
+
deepeval/_version.py,sha256=6gidwc6mZETsJYHWtOUcpPNi74ZeM3A60PXBZMkuLUY,27
|
|
3
3
|
deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
|
|
4
4
|
deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
|
|
5
5
|
deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
|
|
@@ -142,10 +142,11 @@ deepeval/confident/api.py,sha256=bOC71TaVAEgoXFtJ9yMo0-atmUUdBuvaclMGczMcR6o,845
|
|
|
142
142
|
deepeval/confident/types.py,sha256=-slFhDof_1maMgpLxqDRZv6kz6ZVY2hP_0uj_aveJKU,533
|
|
143
143
|
deepeval/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
144
144
|
deepeval/config/settings.py,sha256=gRRi6nXEUKse13xAShU9MA18zo14vpIgl_R0xJ_0vnM,21314
|
|
145
|
-
deepeval/config/settings_manager.py,sha256=
|
|
145
|
+
deepeval/config/settings_manager.py,sha256=enahSZN8krRu7-L94OBCt99fwUIqQtMRL97PlzsuKEY,4021
|
|
146
146
|
deepeval/config/utils.py,sha256=gSOVv18Tx1R72GucbdQesbZLFL-Y9EzbS4p7qd2w_xE,3799
|
|
147
147
|
deepeval/constants.py,sha256=Qe-es-WDPJndgBspEQXxddDCVanrAu03YWCpXsUkdo0,1368
|
|
148
|
-
deepeval/
|
|
148
|
+
deepeval/contextvars.py,sha256=oqXtuYiKd4Zvc1rNoR1gcRBxzZYCGTMVn7XostwvkRI,524
|
|
149
|
+
deepeval/dataset/__init__.py,sha256=N2c-rkuxWYiiJSOZArw0H02Cwo7cnfzFuNYJlvsIBEg,249
|
|
149
150
|
deepeval/dataset/api.py,sha256=ZxkEqAF4nZH_Ys_1f5r9N2LFI_vBcAJxt8eJm7Mplpw,831
|
|
150
151
|
deepeval/dataset/dataset.py,sha256=dDWTSPWN8i_mZBOAgZt0r5Id6q6aeDf8jAKxv81mP1o,51113
|
|
151
152
|
deepeval/dataset/golden.py,sha256=T-rTk4Hw1tANx_Iimv977F6Y4QK3s5OIB4PecU5FJDM,2338
|
|
@@ -158,7 +159,7 @@ deepeval/evaluate/api.py,sha256=rkblH0ZFAAdyuF0Ymh7JE1pIJPR9yFuPrn9SQaCEQp4,435
|
|
|
158
159
|
deepeval/evaluate/compare.py,sha256=tdSJY4E7YJ_zO3dzvpwngZHLiUI2YQcTWJOLI83htsQ,9855
|
|
159
160
|
deepeval/evaluate/configs.py,sha256=QfWjaWNxLsgEe8-5j4PIs5WcSyEckiWt0qdpXSpl57M,928
|
|
160
161
|
deepeval/evaluate/evaluate.py,sha256=NPAJ2iJqJI_RurXKUIC0tft_ozYMIKwZf5iPfmnNhQc,10412
|
|
161
|
-
deepeval/evaluate/execute.py,sha256=
|
|
162
|
+
deepeval/evaluate/execute.py,sha256=7RCjn2GGcjqK6cp9-0BtHL6PPJNw5-KXqXL60GN3G5Y,88672
|
|
162
163
|
deepeval/evaluate/types.py,sha256=IGZ3Xsj0UecPI3JNeTpJaK1gDvlepokfCmHwtItIW9M,831
|
|
163
164
|
deepeval/evaluate/utils.py,sha256=kkliSGzuICeUsXDtlMMPfN95dUKlqarNhfciSffd4gI,23143
|
|
164
165
|
deepeval/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -179,11 +180,10 @@ deepeval/integrations/llama_index/__init__.py,sha256=zBwUFQXDp6QFtp1cfANy8ucV08r
|
|
|
179
180
|
deepeval/integrations/llama_index/agent/patched.py,sha256=4JbH0WQmt4lct7xxIH0phj8_Y-V35dgVv7DEDXK0jZI,2149
|
|
180
181
|
deepeval/integrations/llama_index/handler.py,sha256=eqI1n8E4MsvfKoFs5Zrm9IdCR7g9eBgNedISs7UkU_I,8947
|
|
181
182
|
deepeval/integrations/llama_index/utils.py,sha256=mxW71-3PjvBvJpLIU0kNWuTzCidy5l_-roLt8ZyWYA0,2599
|
|
182
|
-
deepeval/integrations/pydantic_ai/__init__.py,sha256=
|
|
183
|
-
deepeval/integrations/pydantic_ai/agent.py,sha256=
|
|
184
|
-
deepeval/integrations/pydantic_ai/
|
|
185
|
-
deepeval/integrations/pydantic_ai/
|
|
186
|
-
deepeval/integrations/pydantic_ai/utils.py,sha256=734e9un-fn5V7MueAmVsXh304qgumv_fdcmdOC4HrJw,10998
|
|
183
|
+
deepeval/integrations/pydantic_ai/__init__.py,sha256=UIkXn_g6h9LTQXG1PaWu1eCFkCssIwG48WSvN46UWgU,202
|
|
184
|
+
deepeval/integrations/pydantic_ai/agent.py,sha256=4wRV25O1tC-txH2j3TNJWry6gDNBqqThj7zgFKBxJpw,606
|
|
185
|
+
deepeval/integrations/pydantic_ai/instrumentator.py,sha256=tGHuP7nn4jL9bUFR2fZWbF4k_EhF5JXOnCwQm-tmDKc,6974
|
|
186
|
+
deepeval/integrations/pydantic_ai/otel.py,sha256=0OuIpmaMtEt1dFWFZtYAiZ9hVCWweEWr1TRHYcDb4I8,1918
|
|
187
187
|
deepeval/key_handler.py,sha256=damdQEBLGy4IVk5DR5-E3blIZdLbcMtyeGAFn_4_SG4,6505
|
|
188
188
|
deepeval/metrics/__init__.py,sha256=nvO0Wv2JROjK1I9MDNIFUJlrRAZI2C0xbGYSBZK5q4g,4013
|
|
189
189
|
deepeval/metrics/answer_relevancy/__init__.py,sha256=WbZUpoSg2GQoqJ4VIRirVVQ1JDx5xwT-RskwqNKfWGM,46
|
|
@@ -394,12 +394,12 @@ deepeval/openai/__init__.py,sha256=g6ErXRZIJ08XkWJQPdnwWKk_dSOi26ucY23gx62c6OA,1
|
|
|
394
394
|
deepeval/openai/extractors.py,sha256=q062nlYKuPVwqfLFYCD1yWv7xHF1U_XrYdAp5ve2l_E,4942
|
|
395
395
|
deepeval/openai/patch.py,sha256=tPDqXaBScBJveM9P5xLT_mVwkubw0bOey-efvdjZIfg,7466
|
|
396
396
|
deepeval/openai/utils.py,sha256=-84VZGUsnzRkYAFWc_DGaGuQTDCUItk0VtUTdjtSxg4,2748
|
|
397
|
-
deepeval/openai_agents/__init__.py,sha256=
|
|
398
|
-
deepeval/openai_agents/agent.py,sha256=
|
|
399
|
-
deepeval/openai_agents/callback_handler.py,sha256=
|
|
400
|
-
deepeval/openai_agents/extractors.py,sha256=
|
|
401
|
-
deepeval/openai_agents/patch.py,sha256=
|
|
402
|
-
deepeval/openai_agents/runner.py,sha256=
|
|
397
|
+
deepeval/openai_agents/__init__.py,sha256=F4c6MtsdV7LWj0YamQcMGs4_u5sOYZJXWOQP8kV5xUg,314
|
|
398
|
+
deepeval/openai_agents/agent.py,sha256=_SQdd0JzZK-ZvpP7yPEi22Y7fVk16PC00ROahdDQdCQ,951
|
|
399
|
+
deepeval/openai_agents/callback_handler.py,sha256=HSMVKgkbFz6NWOsij0bHlznoDs0CbYYvClc_8345PIc,4637
|
|
400
|
+
deepeval/openai_agents/extractors.py,sha256=jcV-IeWLIh64astJRy_dRBAbUOIab1vp0Wzda7AgVyk,13963
|
|
401
|
+
deepeval/openai_agents/patch.py,sha256=MNvbGe5NLq0rC7L-7lnqcxKhclQvLuBKZnZyAifSHLY,10241
|
|
402
|
+
deepeval/openai_agents/runner.py,sha256=WtHuzhYHgC571uJYGjbTz3R23VaKnlKybGJSRCxM9pY,12310
|
|
403
403
|
deepeval/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
404
404
|
deepeval/plugins/plugin.py,sha256=_dwsdx4Dg9DbXxK3f7zJY4QWTJQWc7QE1HmIg2Zjjag,1515
|
|
405
405
|
deepeval/progress_context.py,sha256=ZSKpxrE9sdgt9G3REKnVeXAv7GJXHHVGgLynpG1Pudw,3557
|
|
@@ -446,23 +446,23 @@ deepeval/test_run/hyperparameters.py,sha256=f7M07w1EfT8YPtiD9xVIVYa3ZewkxewSkK7k
|
|
|
446
446
|
deepeval/test_run/test_run.py,sha256=eCo_NESZruIAtSu2feSbz9AtOcu9v92TNiS0OON_i-I,33611
|
|
447
447
|
deepeval/tracing/__init__.py,sha256=OPsA_VmYNLC1M-WYJ37R6SxGyLnoXIkuyMBTcAneeao,530
|
|
448
448
|
deepeval/tracing/api.py,sha256=rq4rB5f3tfrv6l4mRJmDrwRj5CH4dyatwxhG7p8xbVk,4867
|
|
449
|
-
deepeval/tracing/context.py,sha256=
|
|
449
|
+
deepeval/tracing/context.py,sha256=rzm42zYzP7jmQJO08AV-Qmw86ik45qRfF4UQNpGcmJw,5338
|
|
450
450
|
deepeval/tracing/offline_evals/__init__.py,sha256=bEniJAl7PmS9u2ksiOTfHtlCPJ9_CJV5R6umrUOX5MM,102
|
|
451
451
|
deepeval/tracing/offline_evals/api.py,sha256=eBfqh2uWyeRkIeGhjrN1bTQzAEow-XPubs-42WEZ2QQ,510
|
|
452
452
|
deepeval/tracing/offline_evals/span.py,sha256=pXqTVXs-WnjRVpCYYEbNe0zSM6Wz9GsKHsM5ZcWxrmM,1802
|
|
453
453
|
deepeval/tracing/offline_evals/thread.py,sha256=bcSGFcZJKnszArOLIlWvnCyt0zSmsd7Xsw5rl4RTVFg,1981
|
|
454
454
|
deepeval/tracing/offline_evals/trace.py,sha256=vTflaTKysKRiYvKA-Nx6PUJ3J6NrRLXiIdWieVcm90E,1868
|
|
455
455
|
deepeval/tracing/otel/__init__.py,sha256=HQsaF5yLPwyW5qg8AOV81_nG_7pFHnatOTHi9Wx3HEk,88
|
|
456
|
-
deepeval/tracing/otel/exporter.py,sha256=
|
|
457
|
-
deepeval/tracing/otel/utils.py,sha256=
|
|
456
|
+
deepeval/tracing/otel/exporter.py,sha256=YfzV2zyxRwD6FO-qV0PR-y1nk1kInPJp0TtIuWNHbh0,28606
|
|
457
|
+
deepeval/tracing/otel/utils.py,sha256=xydJyUgR1s4hTGcz-onvVHyiT7F6lUXRuh_nprHkKbU,12396
|
|
458
458
|
deepeval/tracing/patchers.py,sha256=DAPNkhrDtoeyJIVeQDUMhTz-xGcXu00eqjQZmov8FiU,3096
|
|
459
459
|
deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
|
|
460
|
-
deepeval/tracing/tracing.py,sha256=
|
|
460
|
+
deepeval/tracing/tracing.py,sha256=WFXfGLt58Ia9yCohDZBIUGX6mwieoF8489UziuC-NJI,42458
|
|
461
461
|
deepeval/tracing/types.py,sha256=l_utWKerNlE5H3mOKpeUJLsvpP3cMyjH7HRANNgTmSQ,5306
|
|
462
462
|
deepeval/tracing/utils.py,sha256=w_kdhuyBCygllnbqLpDdKJqpJo42t3ZMlGhNicV2A8c,6467
|
|
463
|
-
deepeval/utils.py,sha256
|
|
464
|
-
deepeval-3.
|
|
465
|
-
deepeval-3.
|
|
466
|
-
deepeval-3.
|
|
467
|
-
deepeval-3.
|
|
468
|
-
deepeval-3.
|
|
463
|
+
deepeval/utils.py,sha256=-_o3W892u7naX4Y7a8if4mP0Rtkgtapg6Krr1ZBpj0o,17197
|
|
464
|
+
deepeval-3.6.0.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
|
|
465
|
+
deepeval-3.6.0.dist-info/METADATA,sha256=XHRGHFIWxy8_kHr2iCFDXk-tX-ubqjwXtqofMTSVZiQ,18743
|
|
466
|
+
deepeval-3.6.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
467
|
+
deepeval-3.6.0.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
|
|
468
|
+
deepeval-3.6.0.dist-info/RECORD,,
|