deepeval 3.4.8__py3-none-any.whl → 3.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. deepeval/__init__.py +8 -5
  2. deepeval/_version.py +1 -1
  3. deepeval/benchmarks/drop/drop.py +2 -3
  4. deepeval/benchmarks/hellaswag/hellaswag.py +2 -2
  5. deepeval/benchmarks/logi_qa/logi_qa.py +2 -2
  6. deepeval/benchmarks/math_qa/math_qa.py +2 -2
  7. deepeval/benchmarks/mmlu/mmlu.py +2 -2
  8. deepeval/benchmarks/truthful_qa/truthful_qa.py +2 -2
  9. deepeval/cli/main.py +561 -727
  10. deepeval/confident/api.py +30 -14
  11. deepeval/config/__init__.py +0 -0
  12. deepeval/config/settings.py +565 -0
  13. deepeval/config/settings_manager.py +133 -0
  14. deepeval/config/utils.py +86 -0
  15. deepeval/dataset/__init__.py +1 -0
  16. deepeval/dataset/dataset.py +70 -10
  17. deepeval/dataset/test_run_tracer.py +82 -0
  18. deepeval/dataset/utils.py +23 -0
  19. deepeval/integrations/pydantic_ai/__init__.py +2 -4
  20. deepeval/integrations/pydantic_ai/{setup.py → otel.py} +0 -8
  21. deepeval/integrations/pydantic_ai/patcher.py +376 -0
  22. deepeval/key_handler.py +1 -0
  23. deepeval/metrics/answer_relevancy/template.py +7 -2
  24. deepeval/metrics/faithfulness/template.py +11 -8
  25. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +6 -4
  26. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +6 -4
  27. deepeval/metrics/tool_correctness/tool_correctness.py +7 -3
  28. deepeval/models/llms/amazon_bedrock_model.py +24 -3
  29. deepeval/models/llms/grok_model.py +1 -1
  30. deepeval/models/llms/kimi_model.py +1 -1
  31. deepeval/models/llms/openai_model.py +37 -41
  32. deepeval/models/retry_policy.py +280 -0
  33. deepeval/openai_agents/agent.py +4 -2
  34. deepeval/test_run/api.py +1 -0
  35. deepeval/tracing/otel/exporter.py +20 -8
  36. deepeval/tracing/otel/utils.py +57 -0
  37. deepeval/tracing/perf_epoch_bridge.py +4 -4
  38. deepeval/tracing/tracing.py +37 -16
  39. deepeval/tracing/utils.py +98 -1
  40. deepeval/utils.py +111 -70
  41. {deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/METADATA +16 -13
  42. {deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/RECORD +45 -40
  43. deepeval/env.py +0 -35
  44. deepeval/integrations/pydantic_ai/agent.py +0 -364
  45. {deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/LICENSE.md +0 -0
  46. {deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/WHEEL +0 -0
  47. {deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
1
- deepeval/__init__.py,sha256=AlJuWqCTX3AzXZlpR1uR5V_pB3jzJLfz09KX4oezYjA,2061
2
- deepeval/_version.py,sha256=z7dA6QBDWH8g5G_H07ecMtO-L3XjcOGd19g1PXeMXss,27
1
+ deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
2
+ deepeval/_version.py,sha256=xgoMNdDXsY3c4GfV1_DVK-xGdMOp5KCDaKln5j0PJdY,27
3
3
  deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
4
4
  deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
5
5
  deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
@@ -77,7 +77,7 @@ deepeval/benchmarks/bool_q/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
77
77
  deepeval/benchmarks/bool_q/bool_q.py,sha256=wJM4-wSybT8EwgDJVB4p3QYXGNzLD3tdrpGE1cNEz_E,5507
78
78
  deepeval/benchmarks/bool_q/template.py,sha256=pgNj4RR6-4VJDDySwnKt-MpghBCjVlZ7fPKY6PltllQ,4055
79
79
  deepeval/benchmarks/drop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
- deepeval/benchmarks/drop/drop.py,sha256=Qo74clH8ERf1Zutcz76wrTbR-fbR2qHdsZP6fJ1W-LQ,12054
80
+ deepeval/benchmarks/drop/drop.py,sha256=rGcqd79-IfQ2tvPuAL6wrON4R0hBiVGBy1OtDRmertE,12042
81
81
  deepeval/benchmarks/drop/task.py,sha256=RV7DEXF192IOsY-yIVdlGb_y-A_sS5APPn8PGOPn5yU,17950
82
82
  deepeval/benchmarks/drop/template.py,sha256=1P0mx_71Bxr9juIA8nGpVRIrP8NSoDILkIicjWvqE94,1376
83
83
  deepeval/benchmarks/equity_med_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -88,7 +88,7 @@ deepeval/benchmarks/gsm8k/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
88
88
  deepeval/benchmarks/gsm8k/gsm8k.py,sha256=LyJQBskKuqp013LLO3539RJiRXqCmlGl12BPXvQ8P88,6974
89
89
  deepeval/benchmarks/gsm8k/template.py,sha256=3F7DwQwhJwKxtlbaO6TNvBBRaDEUBEp58JwirSjxtR0,1626
90
90
  deepeval/benchmarks/hellaswag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
- deepeval/benchmarks/hellaswag/hellaswag.py,sha256=ESX4A8WRxSeErPC34QuMM6WVEeDeyiCmNq10twZJxgw,11959
91
+ deepeval/benchmarks/hellaswag/hellaswag.py,sha256=_3felzBwQUhhRXk4D9NbcY8dme_qUQcwUjKGw9OtDJg,11972
92
92
  deepeval/benchmarks/hellaswag/task.py,sha256=LfO8T6bpNiwdM8VdubKrup7qje3-rHgu69iB6Sdsc6I,7323
93
93
  deepeval/benchmarks/hellaswag/template.py,sha256=TcCu25hkl89qbRwcEyRVGTGp7DU_5Eph754W2znk5QY,1279
94
94
  deepeval/benchmarks/human_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -102,15 +102,15 @@ deepeval/benchmarks/lambada/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
102
102
  deepeval/benchmarks/lambada/lambada.py,sha256=FExZLpDBgQfYe9o-MBS0LEy0-i4jHGeFHo8XCbMW_io,5556
103
103
  deepeval/benchmarks/lambada/template.py,sha256=mSn0Elvp34wTnvaAm3IENz0mfGSNM_iRx50hIouk4t0,3776
104
104
  deepeval/benchmarks/logi_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
- deepeval/benchmarks/logi_qa/logi_qa.py,sha256=zN7KTa77d1WkM_m9V-Z0AfHBLFZ70s89yMaQ3_CNV84,11130
105
+ deepeval/benchmarks/logi_qa/logi_qa.py,sha256=VNZGASigEDlJjzwGZtWG3OUs3v3P733GD84-h3TaxjU,11143
106
106
  deepeval/benchmarks/logi_qa/task.py,sha256=pVMLVHPyDFSyoIsnckBNRDt8FK0J317PiGT-0dpr7rs,350
107
107
  deepeval/benchmarks/logi_qa/template.py,sha256=EddGd2s3u2bPejogTcM50SDS7ynHnMhHaKuqQjjZoLk,4354
108
108
  deepeval/benchmarks/math_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
109
- deepeval/benchmarks/math_qa/math_qa.py,sha256=yRlVkUb_L4fPJIvaDo-5EB0XeVId9CxrIZUX7wLQJh4,10638
109
+ deepeval/benchmarks/math_qa/math_qa.py,sha256=_eP-yocJom9r91qmAUBbIH4hrWazEHLV2lDKu0yMfEI,10651
110
110
  deepeval/benchmarks/math_qa/task.py,sha256=3q_jlK5kIl5Zs0mQwuzxyvmPP6ncLZwszn7gtl1GfZs,192
111
111
  deepeval/benchmarks/math_qa/template.py,sha256=pC3PB2GGU5TQ81I7E76RJh0xlu7xiF6d4SK3T_Nksh8,4468
112
112
  deepeval/benchmarks/mmlu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
113
- deepeval/benchmarks/mmlu/mmlu.py,sha256=KEiiRSfQQQSX51dcnrWPkQcud1WvcKe1v0T82P1NowY,11418
113
+ deepeval/benchmarks/mmlu/mmlu.py,sha256=flg3tb052DVo7wnfAHkW9n07tEEhHrkT2C0d5-UMBoQ,11431
114
114
  deepeval/benchmarks/mmlu/task.py,sha256=HnhnuD4Xjur9GlrBtswaR7ZPouGx4NTgbcFZu_oIzXw,2580
115
115
  deepeval/benchmarks/mmlu/template.py,sha256=MsdcrZWVkyZpEw--Kj6W7vjOJgig-ABiz9B3WtZz1MQ,1303
116
116
  deepeval/benchmarks/modes/__init__.py,sha256=IGhZp0-nmvVsZWBnTuBvKhdGiy4TJZShFSjYAeBZdbo,135
@@ -125,29 +125,33 @@ deepeval/benchmarks/truthful_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
125
125
  deepeval/benchmarks/truthful_qa/mode.py,sha256=66aXCTXGTbAprA33M3TT15OhpJAqxLPDzJuShKxiFwY,84
126
126
  deepeval/benchmarks/truthful_qa/task.py,sha256=PmfPbqINd9wizq8Tpk8pwms9TersoGlMGBqxpTmZhcc,1360
127
127
  deepeval/benchmarks/truthful_qa/template.py,sha256=5y6mfJm9AXnQL7xwrfsZjH080GwO1kd_1GdTzDCoYgo,4465
128
- deepeval/benchmarks/truthful_qa/truthful_qa.py,sha256=j8RzrAwgKVt2oWNf1bz9cPIk6b-I7WsjiPElJlQ21Xs,13768
128
+ deepeval/benchmarks/truthful_qa/truthful_qa.py,sha256=2r-xcFnzSSJds3ZGxYogBzjFFrCfJxYpXzKrpE8cC_c,13781
129
129
  deepeval/benchmarks/utils.py,sha256=NHImqH22mv108_CKM7ajTpu4hOeUhr5xPicbf0i2qGk,287
130
130
  deepeval/benchmarks/winogrande/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
131
131
  deepeval/benchmarks/winogrande/template.py,sha256=tDwH8NpNF9x7FbDmQw45XaW1LNqGBV6zP5pwV1uOlwM,2089
132
132
  deepeval/benchmarks/winogrande/winogrande.py,sha256=_4irJkRPw3c-Ufo-hM4cHpPKUoxozedFQpok9n0csTg,5644
133
133
  deepeval/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
134
  deepeval/cli/dotenv_handler.py,sha256=7PtVjCNUZKAXsVJQxznsLexad7y8x-gQ195xAxmv4gA,2468
135
- deepeval/cli/main.py,sha256=KqjnU5yKo5b4x-7eV89ZiB7L-9jmQ5Fitd8uOcj1zEA,57471
135
+ deepeval/cli/main.py,sha256=60FsOU9OGRX49eSSuJkePf8kArSor3-QbeqAZ2bDWCE,51298
136
136
  deepeval/cli/server.py,sha256=cOm9xiYcPYB9GDeFQw9-Iawf9bNfOqftZs7q7mO_P7I,1979
137
137
  deepeval/cli/test.py,sha256=kSIFMRTAfVzBJ4OitwvT829-ylV7UzPMP57P2DePS-Q,5482
138
138
  deepeval/cli/types.py,sha256=_7KdthstHNc-JKCWrfpDQCf_j8h9PMxh0qJCHmVXJr0,310
139
139
  deepeval/cli/utils.py,sha256=F4-yuONzk4ojDoSLjI9RYERB7HOD412iZ2lNlSCq4wk,5601
140
140
  deepeval/confident/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
141
- deepeval/confident/api.py,sha256=owp3a6a0JjOvv9wUCKRrkmMxpZEm6ek_3asBaQ7SjLk,7544
141
+ deepeval/confident/api.py,sha256=-2i3IBLtj5bUIImwOF6ltGVR3ZyViIbIC38XxwWvf54,8318
142
142
  deepeval/confident/types.py,sha256=-slFhDof_1maMgpLxqDRZv6kz6ZVY2hP_0uj_aveJKU,533
143
+ deepeval/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
144
+ deepeval/config/settings.py,sha256=e7sk6_3I14hG457e75DoJd9Ojo3rOkpBZzsMYlj4gKQ,18139
145
+ deepeval/config/settings_manager.py,sha256=PsBS_5dRJASak2AUDwjhjLSiezNz1fje0R3onoFCKC0,4014
146
+ deepeval/config/utils.py,sha256=EG2MwGlUEb7bGuZ77jUo7QLoMeqkcK5gl7Wgk-pcS4g,2443
143
147
  deepeval/constants.py,sha256=XNcTEdhtO4aDnQaHi815A_24iEE9o09Vxnthm7xlfnw,796
144
- deepeval/dataset/__init__.py,sha256=tSdj7ggqVRNihfp6BAejvW5RtGLK7bWAkdGbhqFxhHI,155
148
+ deepeval/dataset/__init__.py,sha256=rcum_VjBXu8eisCdr6sl84BgoZUs3x0tYbB2PnPtHGY,212
145
149
  deepeval/dataset/api.py,sha256=ZxkEqAF4nZH_Ys_1f5r9N2LFI_vBcAJxt8eJm7Mplpw,831
146
- deepeval/dataset/dataset.py,sha256=rL8TjzEjIdeH9iq1-dhfCmlJAQshSs5SZtowZFo3mzU,47075
150
+ deepeval/dataset/dataset.py,sha256=dkUDYtK1z9sDn6A-HOohoHHUobWuaCiuHEABLAfP4kQ,49396
147
151
  deepeval/dataset/golden.py,sha256=T-rTk4Hw1tANx_Iimv977F6Y4QK3s5OIB4PecU5FJDM,2338
152
+ deepeval/dataset/test_run_tracer.py,sha256=5CdpDvhzkEEBRyqWi6egocaxiN6IRS3XfbACxEQZQeM,2544
148
153
  deepeval/dataset/types.py,sha256=cwsAXeaxvd511Uc7-zQ0OxsrPPduCfUSKRbuziAwWhA,309
149
- deepeval/dataset/utils.py,sha256=JXFTF6CEHtAs7WHuXwcCxJldF7T_rKgktNDj9hi1GkI,5071
150
- deepeval/env.py,sha256=w6sM-_-qWzaZLUuIzXCvQ6iK7g4x8R4SEyi6fdcneFw,1018
154
+ deepeval/dataset/utils.py,sha256=kqzzM1kTtAmlt0kv4_6lLQ-oVR20PtcxdKE4NmoZFpM,5735
151
155
  deepeval/errors.py,sha256=_K5wywEw2gp0DDECyeY6UrSI9GgOtCBbfPxt5maIzSY,113
152
156
  deepeval/evaluate/__init__.py,sha256=315IaMiYEz7oJhZ4kPTBfeCNd1xF-wWVU6KOQnrKQpE,291
153
157
  deepeval/evaluate/api.py,sha256=rkblH0ZFAAdyuF0Ymh7JE1pIJPR9yFuPrn9SQaCEQp4,435
@@ -175,15 +179,15 @@ deepeval/integrations/llama_index/__init__.py,sha256=zBwUFQXDp6QFtp1cfANy8ucV08r
175
179
  deepeval/integrations/llama_index/agent/patched.py,sha256=4JbH0WQmt4lct7xxIH0phj8_Y-V35dgVv7DEDXK0jZI,2149
176
180
  deepeval/integrations/llama_index/handler.py,sha256=eqI1n8E4MsvfKoFs5Zrm9IdCR7g9eBgNedISs7UkU_I,8947
177
181
  deepeval/integrations/llama_index/utils.py,sha256=mxW71-3PjvBvJpLIU0kNWuTzCidy5l_-roLt8ZyWYA0,2599
178
- deepeval/integrations/pydantic_ai/__init__.py,sha256=04qKTNftGVzso3n7F1hvKxXI-NG3Enx8TP-EdpJykUc,134
179
- deepeval/integrations/pydantic_ai/agent.py,sha256=6PB_-zWaivQR65xi2F_WqBNtOcUwRpvI8wyCfpGhJJQ,13549
180
- deepeval/integrations/pydantic_ai/setup.py,sha256=-G15PEHZpLUsGNa7yP1IP2t3_6aKt5ac38930VnUYQY,1839
181
- deepeval/key_handler.py,sha256=HAcKqrmFlxidD_5-csg8k5G34hAbwEyWdsEY76oz-UI,6467
182
+ deepeval/integrations/pydantic_ai/__init__.py,sha256=36fBKBLRo1y5jFlj0Y4xhDJsiq4ZnqtmFO32R90Azo4,96
183
+ deepeval/integrations/pydantic_ai/otel.py,sha256=2DpO3RapdztXPlT9BWhQfF4dJDMyp2X7YvuplJ0SwC8,1661
184
+ deepeval/integrations/pydantic_ai/patcher.py,sha256=wszU2YROZAQovyz1ZNRvTtsuJ5By_x4SF6yjtmItcNk,12210
185
+ deepeval/key_handler.py,sha256=damdQEBLGy4IVk5DR5-E3blIZdLbcMtyeGAFn_4_SG4,6505
182
186
  deepeval/metrics/__init__.py,sha256=xofaK_bJq0QCSerSWYjHYRXXch9YQwZHxIfVAv1G7fo,4012
183
187
  deepeval/metrics/answer_relevancy/__init__.py,sha256=WbZUpoSg2GQoqJ4VIRirVVQ1JDx5xwT-RskwqNKfWGM,46
184
188
  deepeval/metrics/answer_relevancy/answer_relevancy.py,sha256=vlc7BzUAtYVW62d5Qa-fIHSLOX239KFwCE7fCGP8jGE,10935
185
189
  deepeval/metrics/answer_relevancy/schema.py,sha256=N8wIBh4qwk4-BZOEyPJM-MB2_0dbkqXHv0aCfsIkROo,405
186
- deepeval/metrics/answer_relevancy/template.py,sha256=gV5iVF1zb8b-V70y-mTkrMzldVIFzqgwqzY1-ZXi4z0,4900
190
+ deepeval/metrics/answer_relevancy/template.py,sha256=vU6yAsiCYtvx5S1g74WeEdJmuGvd2ZtwDDqM5-jfYkM,5174
187
191
  deepeval/metrics/arena_g_eval/__init__.py,sha256=pVDIsWD_DLumOLegJrVSozcWwzsaxJXE5cIN7KxCzws,37
188
192
  deepeval/metrics/arena_g_eval/arena_g_eval.py,sha256=B4Gjct3w5VGPxmumBblFVajdUIdWJTNR0hvMuhgIFg0,11661
189
193
  deepeval/metrics/arena_g_eval/schema.py,sha256=3wipvUpZNO0O4QuWFy1LaLenfTYxLKldCERmP3sVtYI,288
@@ -232,7 +236,7 @@ deepeval/metrics/dag/utils.py,sha256=66D88fpjIUdVwZvYV8a1L9TlX1wvbCVuE6Y8BFTbpkE
232
236
  deepeval/metrics/faithfulness/__init__.py,sha256=RffAtTOSdtWO1gHVMnPI-imJahf3JENOoJRiNw-Xv4g,43
233
237
  deepeval/metrics/faithfulness/faithfulness.py,sha256=bYVhHI7Tr7xH0x-7F2LijxRuCCEtLOnXLzncvJLVv60,12887
234
238
  deepeval/metrics/faithfulness/schema.py,sha256=2dU9dwwmqpGJcWvY2webERWIfH_tn02xgLghHkAY_eM,437
235
- deepeval/metrics/faithfulness/template.py,sha256=Ya5s1q26Nku9CuWvedugGH7k6c7nVs45BSEk3BSiy6s,6643
239
+ deepeval/metrics/faithfulness/template.py,sha256=q5NvVBcUEZgyMy_1zHFGtDNU7PoREFJGOkVQbZf8r-g,7117
236
240
  deepeval/metrics/g_eval/__init__.py,sha256=HAhsQFVq9LIpZXPN00Jc_WrMXrh47NIT86VnUpWM4_4,102
237
241
  deepeval/metrics/g_eval/g_eval.py,sha256=JI3rTaEClYgiL9oLaVFh7sunqGoXI7qBeBgi9RkSwDs,14327
238
242
  deepeval/metrics/g_eval/schema.py,sha256=V629txuDrr_2IEKEsgJVYYZb_pkdfcltQV9ZjvxK5co,287
@@ -284,7 +288,7 @@ deepeval/metrics/multimodal_metrics/image_reference/template.py,sha256=GdBGZu6at
284
288
  deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
285
289
  deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py,sha256=KLvElO0wcsVZ3OSpRlfjI7xzZYj_9eFfK0tg4ssNmc0,12379
286
290
  deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py,sha256=Tqo-qN94CV4ZB6n8I906elt-PL9aUk0ioeuu-KvIQc0,382
287
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py,sha256=16O1P4eB8n_VyW3QiEzCsLjZpLjjmadg7oS_HLQrH5Y,5068
291
+ deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py,sha256=gb9NSVwGp2xI34BvNdY7AccIcoRZKHdP0Ku7JPPHFzs,5439
288
292
  deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
289
293
  deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py,sha256=YUBm40GlvVsGM-4PNXrc4DtFSgvvYBl66yVf9iEVs9c,10656
290
294
  deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py,sha256=Y0kOAfaaKCBmnvBPZXJLZK_C5FAWDyJC8fYbnNQx3eA,305
@@ -300,7 +304,7 @@ deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py,
300
304
  deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
301
305
  deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py,sha256=ZufEUHbgeUyDkCXWR37R-7piiF7xrWvgv6hROOigVIQ,13161
302
306
  deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py,sha256=b-WtfA7zq4TgQiuqqNEMf7jmohnWBMW4opChHyg49Gc,414
303
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py,sha256=H4Y9OYCmyFbXIb5xbtytVZGOyP5s3t1SlG7uUKW6Axo,8076
307
+ deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py,sha256=9EWRC-Wiyr_UEMPfpuTcX2tvsjPxSRY4n_lClcsK6vw,8389
304
308
  deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
305
309
  deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py,sha256=gZ7Q4vF12PLGhbHhOUAl9LIFWDOc9-GKhu3ly_LOkQ0,13997
306
310
  deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py,sha256=H_9-iA1BXJwbPKrGEZBqxDO_En4sjXI8_xKSNYc-hnk,167
@@ -342,7 +346,7 @@ deepeval/metrics/task_completion/schema.py,sha256=JfnZkbCh7skWvrESy65GEo6Rvo0FDJ
342
346
  deepeval/metrics/task_completion/task_completion.py,sha256=RKFkXCVOhO70I8A16zv5BCaV3QVKldNxawJ0T93U_Zc,8978
343
347
  deepeval/metrics/task_completion/template.py,sha256=4xjTBcGrPQxInbf8iwJOZyok9SQex1aCkbxKmfkXoA4,10437
344
348
  deepeval/metrics/tool_correctness/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
345
- deepeval/metrics/tool_correctness/tool_correctness.py,sha256=mMOhdnFdRV7L3SB8Vsyqmp-GiebbixscBRlUi4JjsMI,11960
349
+ deepeval/metrics/tool_correctness/tool_correctness.py,sha256=8uyNFGM_aGFAB2aCv2CVDg5cjj0OJe8UVDqaT3Gp3kU,12090
346
350
  deepeval/metrics/toxicity/__init__.py,sha256=1lgt8BKxfBDd7bfSLu_5kMzmsr9b2_ahPK9oq5zLkMk,39
347
351
  deepeval/metrics/toxicity/schema.py,sha256=7uUdzXqTvIIz5nfahlllo_fzVRXg7UeMeXn7Hl32pKY,459
348
352
  deepeval/metrics/toxicity/template.py,sha256=zl4y4Tg9gXkxKJ8aXVwj0cJ94pvfYuP7MTeV3dvB5yQ,5045
@@ -364,22 +368,23 @@ deepeval/models/embedding_models/ollama_embedding_model.py,sha256=kmJEVmo6nHdoDg
364
368
  deepeval/models/embedding_models/openai_embedding_model.py,sha256=n1ER4gb4PGE7J7HICNoHAync9w9VaSl4tiU0d36T6Fs,2249
365
369
  deepeval/models/hallucination_model.py,sha256=ABi978VKLE_jNHbDzM96kJ08EsZ5ZlvOlJHA_ptSkfQ,1003
366
370
  deepeval/models/llms/__init__.py,sha256=qmvv7wnmTDvys2uUTwQRo-_3DlFV3fGLiewPeQYRsAI,670
367
- deepeval/models/llms/amazon_bedrock_model.py,sha256=FCLaxtfeFNkWspgrE_4-F7JLgCSw-AaHOS45Hmx3Ffk,4704
371
+ deepeval/models/llms/amazon_bedrock_model.py,sha256=0AxdftPxYs86y2ILbRYhWRP38vP_yZl8E-ZrSLIlrOQ,5373
368
372
  deepeval/models/llms/anthropic_model.py,sha256=T55-jKRbM3_B3Db9M3ruklm2cVVU1JDGAzicLTCBtgU,5151
369
373
  deepeval/models/llms/azure_model.py,sha256=MG6sVGUgIy2RURwFWvRP7O_RF6QAg2dpqXIJhIsgY60,10994
370
374
  deepeval/models/llms/deepseek_model.py,sha256=mz0U0uqazAVr8vv8SF74GRTr4ZEVc3Q1v9o5TVbmz_8,5440
371
375
  deepeval/models/llms/gemini_model.py,sha256=QXf9mjopfWwJxpm0gbkXo6op_Wtu1GaIt1BfzS3OU8Q,8174
372
- deepeval/models/llms/grok_model.py,sha256=S_ROLB3NWWyyY5tzezln1xifjWRgwdVGOraIpQPjF2Q,5987
373
- deepeval/models/llms/kimi_model.py,sha256=1axzi_DgtVDno8tb9U8FrRbqKK6LSeL7sQEuQqEsM5A,6727
376
+ deepeval/models/llms/grok_model.py,sha256=zPBmPnNCRGrtg_709gFv5A4iz7WilTmDpAyOpjXTa_M,5986
377
+ deepeval/models/llms/kimi_model.py,sha256=ZcvEwWgnv1dtmbq7LgMQJAjpkjxZr-l5eBi9KGqRbb0,6726
374
378
  deepeval/models/llms/litellm_model.py,sha256=iu4-_JCpd9LdEa-eCWseD2iLTA-r7OSgYGWQ0IxB4eA,11527
375
379
  deepeval/models/llms/local_model.py,sha256=PeF6ofMR8jBMTLzkCkgmkBJix9kHbWV5vTKGx8nehFs,3605
376
380
  deepeval/models/llms/ollama_model.py,sha256=foL6sMza37Z0HH8qPStyIr1g-xEaD6Ce53L2C8Er-P8,3055
377
- deepeval/models/llms/openai_model.py,sha256=UKQ7xXSsWfDos4EIYbPTx1Np3yvNuqmfY1d1g2_T148,17526
381
+ deepeval/models/llms/openai_model.py,sha256=84c_g6jegbQwncrqYYaodJqDgj1SIfoSy2p44ghZfR8,17231
378
382
  deepeval/models/llms/utils.py,sha256=ZMZ02kjXAAleq0bIEyjj-gZwe6Gp0b0mK8YMuid2-20,722
379
383
  deepeval/models/mlllms/__init__.py,sha256=19nN6kUB5XI0nUWUQX0aD9GBUMM8WWGvsDgKjuT4EF4,144
380
384
  deepeval/models/mlllms/gemini_model.py,sha256=bWJe5x_ayHvxtZffKFcrFWkc4_W-hcLu8ErFjecoF_0,9593
381
385
  deepeval/models/mlllms/ollama_model.py,sha256=3i79vflDPS1f4R105VMi26SwlZVkyQFCTJT4OJtU5q4,3823
382
386
  deepeval/models/mlllms/openai_model.py,sha256=q1pe6UfQ39IUFrPSRYyM1h-kz4L5YNkqtzHQsNqAl9M,9786
387
+ deepeval/models/retry_policy.py,sha256=AQWACiUfs8ud-etXUMhd0NKxB6NmDlGvHmTcbT6SxS8,9072
383
388
  deepeval/models/summac_model.py,sha256=wKeH7pWQRXrTlzlIw_r1YCb8b7jUhWq6jUz9FiNUCSg,1992
384
389
  deepeval/models/unbias_model.py,sha256=umOMhQLTmnD7uOuhiQufEl4Wlti4q2s3EtKOpds7zhs,597
385
390
  deepeval/models/utils.py,sha256=-3XDgg1U7PZ0jpLFiYXxqdBhp7idvlo7RPZv5SoD8lc,1130
@@ -388,7 +393,7 @@ deepeval/openai/extractors.py,sha256=q062nlYKuPVwqfLFYCD1yWv7xHF1U_XrYdAp5ve2l_E
388
393
  deepeval/openai/patch.py,sha256=tPDqXaBScBJveM9P5xLT_mVwkubw0bOey-efvdjZIfg,7466
389
394
  deepeval/openai/utils.py,sha256=-84VZGUsnzRkYAFWc_DGaGuQTDCUItk0VtUTdjtSxg4,2748
390
395
  deepeval/openai_agents/__init__.py,sha256=u-e9laod3LyPfLcI5lr7Yhk8ArfWvlpr-D4_idWIt0A,321
391
- deepeval/openai_agents/agent.py,sha256=OrhQbeeRZ8PUPdzQT_WT1PtjR56PpZu4q-zp6vY4qK8,5490
396
+ deepeval/openai_agents/agent.py,sha256=lgwc9pXhJn1xbytl2sy58aZX-gIsFP87fhImXnV9EJU,5564
392
397
  deepeval/openai_agents/callback_handler.py,sha256=AXnL9teO8txUwK3cxBpqFTki5ZWOEy6kSglLjrnCNyA,3264
393
398
  deepeval/openai_agents/extractors.py,sha256=0jZxwgY1NQ3mMxVWPpLcMpKlbj-aYV7rwuzRzG8hdZs,11529
394
399
  deepeval/openai_agents/patch.py,sha256=zSmRV5yOReHC6IylhT93SM1nQpmH3sEWfYcJqa_iM84,3684
@@ -432,7 +437,7 @@ deepeval/test_case/mcp.py,sha256=Z625NLvz0E_UJpbyfyuAi_4nsqKH6DByBf0rfKd70xU,187
432
437
  deepeval/test_case/mllm_test_case.py,sha256=8a0YoE72geX_fLI6yk_cObSxCPddwW-DOb-5OPE1-W8,5414
433
438
  deepeval/test_case/utils.py,sha256=5lT7QmhItsQHt44-qQfspuktilcrEyvl2cS0cgUJxds,809
434
439
  deepeval/test_run/__init__.py,sha256=3npHzm1Z96M2qFVtm22-RTfrVeQs27jKI-RmSWM2nh0,747
435
- deepeval/test_run/api.py,sha256=lde9ETzoCEQuXk8Wjwws1GrxmcCwW0ZOdiK4uTE1_aE,5348
440
+ deepeval/test_run/api.py,sha256=ztXrcMsz0gDebd--IqQrw_gEEBlrkxnAk-WxpUFJQg0,5427
436
441
  deepeval/test_run/cache.py,sha256=2bKXQ4GvHe2xowFGz4ekoheLno1Yv7O6PfuanFkWnA4,12786
437
442
  deepeval/test_run/hooks.py,sha256=Qnd06bk9RJN4WmFUzJrBAi3Xj261hzyzI2iRmG8wbKw,375
438
443
  deepeval/test_run/hyperparameters.py,sha256=f7M07w1EfT8YPtiD9xVIVYa3ZewkxewSkK7knwv0YlY,2289
@@ -446,16 +451,16 @@ deepeval/tracing/offline_evals/span.py,sha256=pXqTVXs-WnjRVpCYYEbNe0zSM6Wz9GsKHs
446
451
  deepeval/tracing/offline_evals/thread.py,sha256=bcSGFcZJKnszArOLIlWvnCyt0zSmsd7Xsw5rl4RTVFg,1981
447
452
  deepeval/tracing/offline_evals/trace.py,sha256=vTflaTKysKRiYvKA-Nx6PUJ3J6NrRLXiIdWieVcm90E,1868
448
453
  deepeval/tracing/otel/__init__.py,sha256=HQsaF5yLPwyW5qg8AOV81_nG_7pFHnatOTHi9Wx3HEk,88
449
- deepeval/tracing/otel/exporter.py,sha256=qNtcf4E6RIffGdVdceLPHOFopXdoRUHCgdlM6JEhG50,25806
450
- deepeval/tracing/otel/utils.py,sha256=joikBEYWr8LpoKV77QiwRvOA1hDcGVbiCbWbIOzZ9g0,7673
454
+ deepeval/tracing/otel/exporter.py,sha256=dXQd834zm5rm1ss9pWkBBlk-JSdtiw7aFLso2hM53XY,26372
455
+ deepeval/tracing/otel/utils.py,sha256=g8yAzhqbPh1fOKCWkfNekC6AVotLfu1SUcfNMo6zii8,9786
451
456
  deepeval/tracing/patchers.py,sha256=DAPNkhrDtoeyJIVeQDUMhTz-xGcXu00eqjQZmov8FiU,3096
452
- deepeval/tracing/perf_epoch_bridge.py,sha256=9g5bdQMKrhXtlS5LgC-oDNE7foe307DE07NqkusW6GU,1800
453
- deepeval/tracing/tracing.py,sha256=5g201dSEfsJwe_71dOIagG7mTrtPry6W42GHyL6wdvs,38048
457
+ deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
458
+ deepeval/tracing/tracing.py,sha256=StvwFEG3MG67n7PBEyDDycdj0myMbP3LMB_FBhaZH-Y,38741
454
459
  deepeval/tracing/types.py,sha256=3w5HEI6y4zuzVr8xGEEzDviLZCX_s_pK85qbwnyf1aY,5196
455
- deepeval/tracing/utils.py,sha256=-qTozfqP7NPZfonwURJAtcLDdz94_7z722Mv64YeuBs,3354
456
- deepeval/utils.py,sha256=fFIuSK3fsjbKeYHYpt2JzfbU6f6Sw0Vv634_Y9Jk4pw,15382
457
- deepeval-3.4.8.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
458
- deepeval-3.4.8.dist-info/METADATA,sha256=oD4K-H3b3ciwjTtFppJKbs8asai4_cZzjVT9BSFwA4Y,18639
459
- deepeval-3.4.8.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
460
- deepeval-3.4.8.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
461
- deepeval-3.4.8.dist-info/RECORD,,
460
+ deepeval/tracing/utils.py,sha256=eTEickbDvRiOu1twNolh4sHnjZF49vqdLgI74BudeTw,6357
461
+ deepeval/utils.py,sha256=EimWDwI1pKCE8vl6kuTnGbGT6ep9zHL5sZ0o-gj49XI,16857
462
+ deepeval-3.5.0.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
463
+ deepeval-3.5.0.dist-info/METADATA,sha256=KBAB5m11q4GAhVwCJBmXZDtaYtKoAO3sQ0vg-ajFRLg,18682
464
+ deepeval-3.5.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
465
+ deepeval-3.5.0.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
466
+ deepeval-3.5.0.dist-info/RECORD,,
deepeval/env.py DELETED
@@ -1,35 +0,0 @@
1
- from __future__ import annotations
2
- import os
3
-
4
- try:
5
- from dotenv import load_dotenv, find_dotenv # type: ignore
6
- except Exception:
7
- load_dotenv = None
8
- find_dotenv = None
9
-
10
-
11
- def autoload_dotenv() -> None:
12
- """
13
- Autoload environment variables for DeepEval at import time.
14
-
15
- Precedence from highest -> lowest:
16
- 1) Existing process environment variables
17
- 2) .env.local (from current working directory)
18
- 3) .env (from current working directory)
19
-
20
- Behavior:
21
- - Loads .env.local then .env if present, without overriding existing vars.
22
- - Opt-out by setting DEEPEVAL_DISABLE_DOTENV=1.
23
- - Soft-fails cleanly if python-dotenv is not installed.
24
- """
25
- if os.getenv("DEEPEVAL_DISABLE_DOTENV") == "1":
26
- return
27
-
28
- if not (load_dotenv and find_dotenv):
29
- return
30
-
31
- for name in (".env.local", ".env"):
32
- path = find_dotenv(name, usecwd=True)
33
- if path:
34
- # Don't override previously set values
35
- load_dotenv(path, override=False)
@@ -1,364 +0,0 @@
1
- from deepeval.telemetry import capture_tracing_integration
2
- from deepeval.metrics import BaseMetric
3
- from typing import List, Optional
4
- import functools
5
- import inspect
6
- import json
7
- from deepeval.test_case import LLMTestCase
8
- from deepeval.tracing.types import TestCaseMetricPair
9
- from deepeval.tracing.tracing import trace_manager
10
- from deepeval.tracing.otel.utils import parse_string, parse_list_of_strings
11
- from opentelemetry import trace
12
-
13
- try:
14
- from opentelemetry.trace import NoOpTracer
15
-
16
- opentelemetry_installed = True
17
- except:
18
- opentelemetry_installed = False
19
-
20
-
21
- def is_opentelemetry_available():
22
- if not opentelemetry_installed:
23
- raise ImportError(
24
- "OpenTelemetry SDK is not available. Please install it with `pip install opentelemetry-sdk`."
25
- )
26
- return True
27
-
28
-
29
- try:
30
- from pydantic_ai.agent import Agent
31
- from pydantic_ai.models.instrumented import InstrumentedModel
32
-
33
- pydantic_ai_installed = True
34
- except:
35
- pydantic_ai_installed = False
36
-
37
-
38
- def is_pydantic_ai_installed():
39
- if not pydantic_ai_installed:
40
- raise ImportError(
41
- "Pydantic AI is not installed. Please install it with `pip install pydantic-ai`."
42
- )
43
-
44
-
45
- class PydanticAIAgent(Agent):
46
- def __init__(self, *args, **kwargs):
47
- with capture_tracing_integration("pydantic_ai.agent.PydanticAIAgent"):
48
- is_pydantic_ai_installed()
49
- is_opentelemetry_available()
50
-
51
- super().__init__(*args, **kwargs)
52
-
53
- # attributes to be set if ran synchronously
54
- self.metric_collection: str = None
55
- self.metrics: list[BaseMetric] = None
56
-
57
- # trace attributes to be set if ran synchronously
58
- self._trace_name: str = None
59
- self._trace_tags: list[str] = None
60
- self._trace_metadata: dict = None
61
- self._trace_thread_id: str = None
62
- self._trace_user_id: str = None
63
-
64
- # Patch the run method only for this instance
65
- self._patch_run_method()
66
- self._patch_run_method_sync()
67
- self._patch_tool_decorator()
68
-
69
- def _patch_tool_decorator(self):
70
- """Patch the tool decorator to print input and output"""
71
- original_tool = self.tool
72
-
73
- @functools.wraps(original_tool)
74
- def patched_tool(
75
- *args,
76
- metric_collection: Optional[str] = None,
77
- metrics: Optional[List[BaseMetric]] = None,
78
- **kwargs
79
- ):
80
-
81
- # Check if function is in args (direct decoration: @agent.tool)
82
- if args and callable(args[0]):
83
- original_func = args[0]
84
- patched_func = self._create_patched_function(
85
- original_func, metric_collection, metrics
86
- )
87
- new_args = (patched_func,) + args[1:]
88
- result = original_tool(*new_args, **kwargs)
89
- return result
90
- else:
91
- # Decorator called with parameters: @agent.tool(metric_collection="...")
92
- # Return a decorator that will receive the function
93
- def decorator_with_params(func):
94
- patched_func = self._create_patched_function(
95
- func, metric_collection, metrics
96
- )
97
- return original_tool(patched_func, **kwargs)
98
-
99
- return decorator_with_params
100
-
101
- # Replace the tool method for this instance
102
- self.tool = patched_tool
103
-
104
- def _create_patched_function(
105
- self, original_func, metric_collection, metrics
106
- ):
107
- """Create a patched version of the function that adds tracing"""
108
- if inspect.iscoroutinefunction(original_func):
109
-
110
- @functools.wraps(original_func)
111
- async def patched_async_func(*func_args, **func_kwargs):
112
- result = await original_func(*func_args, **func_kwargs)
113
-
114
- current_span = trace.get_current_span()
115
- if current_span.is_recording():
116
- try:
117
- result_str = str(result)
118
- except Exception:
119
- result_str = ""
120
- current_span.set_attribute(
121
- "confident.span.output", result_str
122
- )
123
- if metric_collection:
124
- current_span.set_attribute(
125
- "confident.span.metric_collection",
126
- metric_collection,
127
- )
128
- # TODO: add metrics in component level evals
129
- return result
130
-
131
- return patched_async_func
132
- else:
133
-
134
- @functools.wraps(original_func)
135
- def patched_sync_func(*func_args, **func_kwargs):
136
- result = original_func(*func_args, **func_kwargs)
137
-
138
- current_span = trace.get_current_span()
139
- if current_span.is_recording():
140
- try:
141
- result_str = str(result)
142
- except Exception:
143
- result_str = ""
144
- current_span.set_attribute(
145
- "confident.span.output", result_str
146
- )
147
- if metric_collection:
148
- current_span.set_attribute(
149
- "confident.span.metric_collection",
150
- metric_collection,
151
- )
152
- # TODO: add metrics in component level evals
153
- return result
154
-
155
- return patched_sync_func
156
-
157
- def _patch_run_method(self):
158
- """Patch the Agent.run method only for this PydanticAIAgent instance"""
159
- original_run = self.run
160
-
161
- @functools.wraps(original_run)
162
- async def patched_run(
163
- *args,
164
- metric_collection=None,
165
- metrics=None,
166
- trace_name=None,
167
- trace_tags=None,
168
- trace_metadata=None,
169
- trace_thread_id=None,
170
- trace_user_id=None,
171
- **kwargs
172
- ):
173
- # extract and validate flattened arguments - use safe pop with defaults
174
- metric_collection = parse_string(metric_collection)
175
- trace_name = parse_string(trace_name)
176
- trace_tags = parse_list_of_strings(trace_tags)
177
- trace_thread_id = parse_string(trace_thread_id)
178
- trace_user_id = parse_string(trace_user_id)
179
-
180
- if metrics is not None and not (
181
- isinstance(metrics, list)
182
- and all(isinstance(m, BaseMetric) for m in metrics)
183
- ):
184
- raise TypeError(
185
- "metrics must be a list of BaseMetric instances"
186
- )
187
-
188
- if trace_metadata is not None and not isinstance(
189
- trace_metadata, dict
190
- ):
191
- raise TypeError("trace_metadata must be a dictionary")
192
-
193
- model = kwargs.get("model", None)
194
- infer_name = kwargs.get("infer_name", True)
195
-
196
- if infer_name and self.name is None:
197
- self._infer_name(inspect.currentframe())
198
- model_used = self._get_model(model)
199
- del model
200
-
201
- if isinstance(model_used, InstrumentedModel):
202
- tracer = model_used.instrumentation_settings.tracer
203
- else:
204
- tracer = NoOpTracer()
205
-
206
- with tracer.start_as_current_span("agent") as run_span:
207
- result = await original_run(*args, **kwargs)
208
-
209
- name = "agent"
210
- if self.name:
211
- name = str(self.name)
212
-
213
- input = ""
214
- if isinstance(args[0], str):
215
- input = args[0]
216
- elif isinstance(args[0], list) and all(
217
- isinstance(i, str) for i in args[0]
218
- ):
219
- input = args[0]
220
-
221
- output = ""
222
- try:
223
- output = str(result.output)
224
- except Exception:
225
- pass
226
-
227
- # set agent span attributes
228
- run_span.set_attribute("confident.span.type", "agent")
229
- run_span.set_attribute("confident.agent.name", name)
230
- run_span.set_attribute("confident.agent.input", input)
231
- run_span.set_attribute("confident.agent.output", output)
232
-
233
- # fallback for input and output not being set
234
- run_span.set_attribute("confident.span.input", input)
235
- run_span.set_attribute("confident.span.output", output)
236
-
237
- if metric_collection: # flattened argument to be replaced
238
- run_span.set_attribute(
239
- "confident.span.metric_collection", metric_collection
240
- )
241
- elif self.metric_collection: # for run_sync
242
- run_span.set_attribute(
243
- "confident.span.metric_collection",
244
- self.metric_collection,
245
- )
246
-
247
- # set the flattened trace attributes
248
- if trace_name:
249
- run_span.set_attribute("confident.trace.name", trace_name)
250
- if trace_tags:
251
- run_span.set_attribute("confident.trace.tags", trace_tags)
252
- if trace_metadata:
253
- run_span.set_attribute(
254
- "confident.trace.metadata", json.dumps(trace_metadata)
255
- )
256
- if trace_thread_id:
257
- run_span.set_attribute(
258
- "confident.trace.thread_id", trace_thread_id
259
- )
260
- if trace_user_id:
261
- run_span.set_attribute(
262
- "confident.trace.user_id", trace_user_id
263
- )
264
-
265
- # for run_sync
266
- if self._trace_name:
267
- run_span.set_attribute(
268
- "confident.trace.name", self._trace_name
269
- )
270
- if self._trace_tags:
271
- run_span.set_attribute(
272
- "confident.trace.tags", self._trace_tags
273
- )
274
- if self._trace_metadata:
275
- run_span.set_attribute(
276
- "confident.trace.metadata",
277
- json.dumps(self._trace_metadata),
278
- )
279
- if self._trace_thread_id:
280
- run_span.set_attribute(
281
- "confident.trace.thread_id", self._trace_thread_id
282
- )
283
- if self._trace_user_id:
284
- run_span.set_attribute(
285
- "confident.trace.user_id", self._trace_user_id
286
- )
287
-
288
- if metrics: # flattened argument to be replaced
289
- trace_manager.test_case_metrics.append(
290
- TestCaseMetricPair(
291
- test_case=LLMTestCase(
292
- input=input, actual_output=output
293
- ),
294
- metrics=metrics,
295
- )
296
- )
297
- elif self.metrics: # for run_sync
298
- trace_manager.test_case_metrics.append(
299
- TestCaseMetricPair(
300
- test_case=LLMTestCase(
301
- input=input, actual_output=output
302
- ),
303
- metrics=self.metrics,
304
- )
305
- )
306
-
307
- return result
308
-
309
- # Replace the method only for this instance
310
- self.run = patched_run
311
-
312
- def _patch_run_method_sync(self):
313
- """Patch the Agent.run method only for this PydanticAIAgent instance"""
314
- original_run = self.run_sync
315
-
316
- @functools.wraps(original_run)
317
- def patched_run(
318
- *args,
319
- metric_collection=None,
320
- metrics=None,
321
- trace_name=None,
322
- trace_tags=None,
323
- trace_metadata=None,
324
- trace_thread_id=None,
325
- trace_user_id=None,
326
- **kwargs
327
- ):
328
- metric_collection = parse_string(metric_collection)
329
- trace_name = parse_string(trace_name)
330
- trace_tags = parse_list_of_strings(trace_tags)
331
- trace_thread_id = parse_string(trace_thread_id)
332
- trace_user_id = parse_string(trace_user_id)
333
-
334
- if metrics is not None and not (
335
- isinstance(metrics, list)
336
- and all(isinstance(m, BaseMetric) for m in metrics)
337
- ):
338
- raise TypeError(
339
- "metrics must be a list of BaseMetric instances"
340
- )
341
-
342
- if trace_metadata is not None and not isinstance(
343
- trace_metadata, dict
344
- ):
345
- raise TypeError("trace_metadata must be a dictionary")
346
-
347
- # attributes to be set if ran synchronously
348
- if metric_collection:
349
- self.metric_collection = metric_collection
350
- if metrics:
351
- self.metrics = metrics
352
-
353
- self._trace_name = trace_name
354
- self._trace_tags = trace_tags
355
- self._trace_metadata = trace_metadata
356
- self._trace_thread_id = trace_thread_id
357
- self._trace_user_id = trace_user_id
358
-
359
- result = original_run(*args, **kwargs)
360
-
361
- return result
362
-
363
- # Replace the method only for this instance
364
- self.run_sync = patched_run