deepeval 3.4.8__py3-none-any.whl → 3.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/__init__.py +8 -5
- deepeval/_version.py +1 -1
- deepeval/benchmarks/drop/drop.py +2 -3
- deepeval/benchmarks/hellaswag/hellaswag.py +2 -2
- deepeval/benchmarks/logi_qa/logi_qa.py +2 -2
- deepeval/benchmarks/math_qa/math_qa.py +2 -2
- deepeval/benchmarks/mmlu/mmlu.py +2 -2
- deepeval/benchmarks/truthful_qa/truthful_qa.py +2 -2
- deepeval/cli/main.py +561 -727
- deepeval/confident/api.py +30 -14
- deepeval/config/__init__.py +0 -0
- deepeval/config/settings.py +565 -0
- deepeval/config/settings_manager.py +133 -0
- deepeval/config/utils.py +86 -0
- deepeval/dataset/__init__.py +1 -0
- deepeval/dataset/dataset.py +70 -10
- deepeval/dataset/test_run_tracer.py +82 -0
- deepeval/dataset/utils.py +23 -0
- deepeval/integrations/pydantic_ai/__init__.py +2 -4
- deepeval/integrations/pydantic_ai/{setup.py → otel.py} +0 -8
- deepeval/integrations/pydantic_ai/patcher.py +376 -0
- deepeval/key_handler.py +1 -0
- deepeval/metrics/answer_relevancy/template.py +7 -2
- deepeval/metrics/faithfulness/template.py +11 -8
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +6 -4
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +6 -4
- deepeval/metrics/tool_correctness/tool_correctness.py +7 -3
- deepeval/models/llms/amazon_bedrock_model.py +24 -3
- deepeval/models/llms/grok_model.py +1 -1
- deepeval/models/llms/kimi_model.py +1 -1
- deepeval/models/llms/openai_model.py +37 -41
- deepeval/models/retry_policy.py +280 -0
- deepeval/openai_agents/agent.py +4 -2
- deepeval/test_run/api.py +1 -0
- deepeval/tracing/otel/exporter.py +20 -8
- deepeval/tracing/otel/utils.py +57 -0
- deepeval/tracing/perf_epoch_bridge.py +4 -4
- deepeval/tracing/tracing.py +37 -16
- deepeval/tracing/utils.py +98 -1
- deepeval/utils.py +111 -70
- {deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/METADATA +16 -13
- {deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/RECORD +45 -40
- deepeval/env.py +0 -35
- deepeval/integrations/pydantic_ai/agent.py +0 -364
- {deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/LICENSE.md +0 -0
- {deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/WHEEL +0 -0
- {deepeval-3.4.8.dist-info → deepeval-3.5.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
deepeval/__init__.py,sha256=
|
|
2
|
-
deepeval/_version.py,sha256=
|
|
1
|
+
deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
|
|
2
|
+
deepeval/_version.py,sha256=xgoMNdDXsY3c4GfV1_DVK-xGdMOp5KCDaKln5j0PJdY,27
|
|
3
3
|
deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
|
|
4
4
|
deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
|
|
5
5
|
deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
|
|
@@ -77,7 +77,7 @@ deepeval/benchmarks/bool_q/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
|
|
|
77
77
|
deepeval/benchmarks/bool_q/bool_q.py,sha256=wJM4-wSybT8EwgDJVB4p3QYXGNzLD3tdrpGE1cNEz_E,5507
|
|
78
78
|
deepeval/benchmarks/bool_q/template.py,sha256=pgNj4RR6-4VJDDySwnKt-MpghBCjVlZ7fPKY6PltllQ,4055
|
|
79
79
|
deepeval/benchmarks/drop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
|
-
deepeval/benchmarks/drop/drop.py,sha256=
|
|
80
|
+
deepeval/benchmarks/drop/drop.py,sha256=rGcqd79-IfQ2tvPuAL6wrON4R0hBiVGBy1OtDRmertE,12042
|
|
81
81
|
deepeval/benchmarks/drop/task.py,sha256=RV7DEXF192IOsY-yIVdlGb_y-A_sS5APPn8PGOPn5yU,17950
|
|
82
82
|
deepeval/benchmarks/drop/template.py,sha256=1P0mx_71Bxr9juIA8nGpVRIrP8NSoDILkIicjWvqE94,1376
|
|
83
83
|
deepeval/benchmarks/equity_med_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -88,7 +88,7 @@ deepeval/benchmarks/gsm8k/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
|
|
|
88
88
|
deepeval/benchmarks/gsm8k/gsm8k.py,sha256=LyJQBskKuqp013LLO3539RJiRXqCmlGl12BPXvQ8P88,6974
|
|
89
89
|
deepeval/benchmarks/gsm8k/template.py,sha256=3F7DwQwhJwKxtlbaO6TNvBBRaDEUBEp58JwirSjxtR0,1626
|
|
90
90
|
deepeval/benchmarks/hellaswag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
91
|
-
deepeval/benchmarks/hellaswag/hellaswag.py,sha256=
|
|
91
|
+
deepeval/benchmarks/hellaswag/hellaswag.py,sha256=_3felzBwQUhhRXk4D9NbcY8dme_qUQcwUjKGw9OtDJg,11972
|
|
92
92
|
deepeval/benchmarks/hellaswag/task.py,sha256=LfO8T6bpNiwdM8VdubKrup7qje3-rHgu69iB6Sdsc6I,7323
|
|
93
93
|
deepeval/benchmarks/hellaswag/template.py,sha256=TcCu25hkl89qbRwcEyRVGTGp7DU_5Eph754W2znk5QY,1279
|
|
94
94
|
deepeval/benchmarks/human_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -102,15 +102,15 @@ deepeval/benchmarks/lambada/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
|
|
|
102
102
|
deepeval/benchmarks/lambada/lambada.py,sha256=FExZLpDBgQfYe9o-MBS0LEy0-i4jHGeFHo8XCbMW_io,5556
|
|
103
103
|
deepeval/benchmarks/lambada/template.py,sha256=mSn0Elvp34wTnvaAm3IENz0mfGSNM_iRx50hIouk4t0,3776
|
|
104
104
|
deepeval/benchmarks/logi_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
105
|
-
deepeval/benchmarks/logi_qa/logi_qa.py,sha256=
|
|
105
|
+
deepeval/benchmarks/logi_qa/logi_qa.py,sha256=VNZGASigEDlJjzwGZtWG3OUs3v3P733GD84-h3TaxjU,11143
|
|
106
106
|
deepeval/benchmarks/logi_qa/task.py,sha256=pVMLVHPyDFSyoIsnckBNRDt8FK0J317PiGT-0dpr7rs,350
|
|
107
107
|
deepeval/benchmarks/logi_qa/template.py,sha256=EddGd2s3u2bPejogTcM50SDS7ynHnMhHaKuqQjjZoLk,4354
|
|
108
108
|
deepeval/benchmarks/math_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
109
|
-
deepeval/benchmarks/math_qa/math_qa.py,sha256=
|
|
109
|
+
deepeval/benchmarks/math_qa/math_qa.py,sha256=_eP-yocJom9r91qmAUBbIH4hrWazEHLV2lDKu0yMfEI,10651
|
|
110
110
|
deepeval/benchmarks/math_qa/task.py,sha256=3q_jlK5kIl5Zs0mQwuzxyvmPP6ncLZwszn7gtl1GfZs,192
|
|
111
111
|
deepeval/benchmarks/math_qa/template.py,sha256=pC3PB2GGU5TQ81I7E76RJh0xlu7xiF6d4SK3T_Nksh8,4468
|
|
112
112
|
deepeval/benchmarks/mmlu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
113
|
-
deepeval/benchmarks/mmlu/mmlu.py,sha256=
|
|
113
|
+
deepeval/benchmarks/mmlu/mmlu.py,sha256=flg3tb052DVo7wnfAHkW9n07tEEhHrkT2C0d5-UMBoQ,11431
|
|
114
114
|
deepeval/benchmarks/mmlu/task.py,sha256=HnhnuD4Xjur9GlrBtswaR7ZPouGx4NTgbcFZu_oIzXw,2580
|
|
115
115
|
deepeval/benchmarks/mmlu/template.py,sha256=MsdcrZWVkyZpEw--Kj6W7vjOJgig-ABiz9B3WtZz1MQ,1303
|
|
116
116
|
deepeval/benchmarks/modes/__init__.py,sha256=IGhZp0-nmvVsZWBnTuBvKhdGiy4TJZShFSjYAeBZdbo,135
|
|
@@ -125,29 +125,33 @@ deepeval/benchmarks/truthful_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
125
125
|
deepeval/benchmarks/truthful_qa/mode.py,sha256=66aXCTXGTbAprA33M3TT15OhpJAqxLPDzJuShKxiFwY,84
|
|
126
126
|
deepeval/benchmarks/truthful_qa/task.py,sha256=PmfPbqINd9wizq8Tpk8pwms9TersoGlMGBqxpTmZhcc,1360
|
|
127
127
|
deepeval/benchmarks/truthful_qa/template.py,sha256=5y6mfJm9AXnQL7xwrfsZjH080GwO1kd_1GdTzDCoYgo,4465
|
|
128
|
-
deepeval/benchmarks/truthful_qa/truthful_qa.py,sha256=
|
|
128
|
+
deepeval/benchmarks/truthful_qa/truthful_qa.py,sha256=2r-xcFnzSSJds3ZGxYogBzjFFrCfJxYpXzKrpE8cC_c,13781
|
|
129
129
|
deepeval/benchmarks/utils.py,sha256=NHImqH22mv108_CKM7ajTpu4hOeUhr5xPicbf0i2qGk,287
|
|
130
130
|
deepeval/benchmarks/winogrande/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
131
131
|
deepeval/benchmarks/winogrande/template.py,sha256=tDwH8NpNF9x7FbDmQw45XaW1LNqGBV6zP5pwV1uOlwM,2089
|
|
132
132
|
deepeval/benchmarks/winogrande/winogrande.py,sha256=_4irJkRPw3c-Ufo-hM4cHpPKUoxozedFQpok9n0csTg,5644
|
|
133
133
|
deepeval/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
134
134
|
deepeval/cli/dotenv_handler.py,sha256=7PtVjCNUZKAXsVJQxznsLexad7y8x-gQ195xAxmv4gA,2468
|
|
135
|
-
deepeval/cli/main.py,sha256=
|
|
135
|
+
deepeval/cli/main.py,sha256=60FsOU9OGRX49eSSuJkePf8kArSor3-QbeqAZ2bDWCE,51298
|
|
136
136
|
deepeval/cli/server.py,sha256=cOm9xiYcPYB9GDeFQw9-Iawf9bNfOqftZs7q7mO_P7I,1979
|
|
137
137
|
deepeval/cli/test.py,sha256=kSIFMRTAfVzBJ4OitwvT829-ylV7UzPMP57P2DePS-Q,5482
|
|
138
138
|
deepeval/cli/types.py,sha256=_7KdthstHNc-JKCWrfpDQCf_j8h9PMxh0qJCHmVXJr0,310
|
|
139
139
|
deepeval/cli/utils.py,sha256=F4-yuONzk4ojDoSLjI9RYERB7HOD412iZ2lNlSCq4wk,5601
|
|
140
140
|
deepeval/confident/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
141
|
-
deepeval/confident/api.py,sha256
|
|
141
|
+
deepeval/confident/api.py,sha256=-2i3IBLtj5bUIImwOF6ltGVR3ZyViIbIC38XxwWvf54,8318
|
|
142
142
|
deepeval/confident/types.py,sha256=-slFhDof_1maMgpLxqDRZv6kz6ZVY2hP_0uj_aveJKU,533
|
|
143
|
+
deepeval/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
144
|
+
deepeval/config/settings.py,sha256=e7sk6_3I14hG457e75DoJd9Ojo3rOkpBZzsMYlj4gKQ,18139
|
|
145
|
+
deepeval/config/settings_manager.py,sha256=PsBS_5dRJASak2AUDwjhjLSiezNz1fje0R3onoFCKC0,4014
|
|
146
|
+
deepeval/config/utils.py,sha256=EG2MwGlUEb7bGuZ77jUo7QLoMeqkcK5gl7Wgk-pcS4g,2443
|
|
143
147
|
deepeval/constants.py,sha256=XNcTEdhtO4aDnQaHi815A_24iEE9o09Vxnthm7xlfnw,796
|
|
144
|
-
deepeval/dataset/__init__.py,sha256=
|
|
148
|
+
deepeval/dataset/__init__.py,sha256=rcum_VjBXu8eisCdr6sl84BgoZUs3x0tYbB2PnPtHGY,212
|
|
145
149
|
deepeval/dataset/api.py,sha256=ZxkEqAF4nZH_Ys_1f5r9N2LFI_vBcAJxt8eJm7Mplpw,831
|
|
146
|
-
deepeval/dataset/dataset.py,sha256=
|
|
150
|
+
deepeval/dataset/dataset.py,sha256=dkUDYtK1z9sDn6A-HOohoHHUobWuaCiuHEABLAfP4kQ,49396
|
|
147
151
|
deepeval/dataset/golden.py,sha256=T-rTk4Hw1tANx_Iimv977F6Y4QK3s5OIB4PecU5FJDM,2338
|
|
152
|
+
deepeval/dataset/test_run_tracer.py,sha256=5CdpDvhzkEEBRyqWi6egocaxiN6IRS3XfbACxEQZQeM,2544
|
|
148
153
|
deepeval/dataset/types.py,sha256=cwsAXeaxvd511Uc7-zQ0OxsrPPduCfUSKRbuziAwWhA,309
|
|
149
|
-
deepeval/dataset/utils.py,sha256=
|
|
150
|
-
deepeval/env.py,sha256=w6sM-_-qWzaZLUuIzXCvQ6iK7g4x8R4SEyi6fdcneFw,1018
|
|
154
|
+
deepeval/dataset/utils.py,sha256=kqzzM1kTtAmlt0kv4_6lLQ-oVR20PtcxdKE4NmoZFpM,5735
|
|
151
155
|
deepeval/errors.py,sha256=_K5wywEw2gp0DDECyeY6UrSI9GgOtCBbfPxt5maIzSY,113
|
|
152
156
|
deepeval/evaluate/__init__.py,sha256=315IaMiYEz7oJhZ4kPTBfeCNd1xF-wWVU6KOQnrKQpE,291
|
|
153
157
|
deepeval/evaluate/api.py,sha256=rkblH0ZFAAdyuF0Ymh7JE1pIJPR9yFuPrn9SQaCEQp4,435
|
|
@@ -175,15 +179,15 @@ deepeval/integrations/llama_index/__init__.py,sha256=zBwUFQXDp6QFtp1cfANy8ucV08r
|
|
|
175
179
|
deepeval/integrations/llama_index/agent/patched.py,sha256=4JbH0WQmt4lct7xxIH0phj8_Y-V35dgVv7DEDXK0jZI,2149
|
|
176
180
|
deepeval/integrations/llama_index/handler.py,sha256=eqI1n8E4MsvfKoFs5Zrm9IdCR7g9eBgNedISs7UkU_I,8947
|
|
177
181
|
deepeval/integrations/llama_index/utils.py,sha256=mxW71-3PjvBvJpLIU0kNWuTzCidy5l_-roLt8ZyWYA0,2599
|
|
178
|
-
deepeval/integrations/pydantic_ai/__init__.py,sha256=
|
|
179
|
-
deepeval/integrations/pydantic_ai/
|
|
180
|
-
deepeval/integrations/pydantic_ai/
|
|
181
|
-
deepeval/key_handler.py,sha256=
|
|
182
|
+
deepeval/integrations/pydantic_ai/__init__.py,sha256=36fBKBLRo1y5jFlj0Y4xhDJsiq4ZnqtmFO32R90Azo4,96
|
|
183
|
+
deepeval/integrations/pydantic_ai/otel.py,sha256=2DpO3RapdztXPlT9BWhQfF4dJDMyp2X7YvuplJ0SwC8,1661
|
|
184
|
+
deepeval/integrations/pydantic_ai/patcher.py,sha256=wszU2YROZAQovyz1ZNRvTtsuJ5By_x4SF6yjtmItcNk,12210
|
|
185
|
+
deepeval/key_handler.py,sha256=damdQEBLGy4IVk5DR5-E3blIZdLbcMtyeGAFn_4_SG4,6505
|
|
182
186
|
deepeval/metrics/__init__.py,sha256=xofaK_bJq0QCSerSWYjHYRXXch9YQwZHxIfVAv1G7fo,4012
|
|
183
187
|
deepeval/metrics/answer_relevancy/__init__.py,sha256=WbZUpoSg2GQoqJ4VIRirVVQ1JDx5xwT-RskwqNKfWGM,46
|
|
184
188
|
deepeval/metrics/answer_relevancy/answer_relevancy.py,sha256=vlc7BzUAtYVW62d5Qa-fIHSLOX239KFwCE7fCGP8jGE,10935
|
|
185
189
|
deepeval/metrics/answer_relevancy/schema.py,sha256=N8wIBh4qwk4-BZOEyPJM-MB2_0dbkqXHv0aCfsIkROo,405
|
|
186
|
-
deepeval/metrics/answer_relevancy/template.py,sha256=
|
|
190
|
+
deepeval/metrics/answer_relevancy/template.py,sha256=vU6yAsiCYtvx5S1g74WeEdJmuGvd2ZtwDDqM5-jfYkM,5174
|
|
187
191
|
deepeval/metrics/arena_g_eval/__init__.py,sha256=pVDIsWD_DLumOLegJrVSozcWwzsaxJXE5cIN7KxCzws,37
|
|
188
192
|
deepeval/metrics/arena_g_eval/arena_g_eval.py,sha256=B4Gjct3w5VGPxmumBblFVajdUIdWJTNR0hvMuhgIFg0,11661
|
|
189
193
|
deepeval/metrics/arena_g_eval/schema.py,sha256=3wipvUpZNO0O4QuWFy1LaLenfTYxLKldCERmP3sVtYI,288
|
|
@@ -232,7 +236,7 @@ deepeval/metrics/dag/utils.py,sha256=66D88fpjIUdVwZvYV8a1L9TlX1wvbCVuE6Y8BFTbpkE
|
|
|
232
236
|
deepeval/metrics/faithfulness/__init__.py,sha256=RffAtTOSdtWO1gHVMnPI-imJahf3JENOoJRiNw-Xv4g,43
|
|
233
237
|
deepeval/metrics/faithfulness/faithfulness.py,sha256=bYVhHI7Tr7xH0x-7F2LijxRuCCEtLOnXLzncvJLVv60,12887
|
|
234
238
|
deepeval/metrics/faithfulness/schema.py,sha256=2dU9dwwmqpGJcWvY2webERWIfH_tn02xgLghHkAY_eM,437
|
|
235
|
-
deepeval/metrics/faithfulness/template.py,sha256=
|
|
239
|
+
deepeval/metrics/faithfulness/template.py,sha256=q5NvVBcUEZgyMy_1zHFGtDNU7PoREFJGOkVQbZf8r-g,7117
|
|
236
240
|
deepeval/metrics/g_eval/__init__.py,sha256=HAhsQFVq9LIpZXPN00Jc_WrMXrh47NIT86VnUpWM4_4,102
|
|
237
241
|
deepeval/metrics/g_eval/g_eval.py,sha256=JI3rTaEClYgiL9oLaVFh7sunqGoXI7qBeBgi9RkSwDs,14327
|
|
238
242
|
deepeval/metrics/g_eval/schema.py,sha256=V629txuDrr_2IEKEsgJVYYZb_pkdfcltQV9ZjvxK5co,287
|
|
@@ -284,7 +288,7 @@ deepeval/metrics/multimodal_metrics/image_reference/template.py,sha256=GdBGZu6at
|
|
|
284
288
|
deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
285
289
|
deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py,sha256=KLvElO0wcsVZ3OSpRlfjI7xzZYj_9eFfK0tg4ssNmc0,12379
|
|
286
290
|
deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py,sha256=Tqo-qN94CV4ZB6n8I906elt-PL9aUk0ioeuu-KvIQc0,382
|
|
287
|
-
deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py,sha256=
|
|
291
|
+
deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py,sha256=gb9NSVwGp2xI34BvNdY7AccIcoRZKHdP0Ku7JPPHFzs,5439
|
|
288
292
|
deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
289
293
|
deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py,sha256=YUBm40GlvVsGM-4PNXrc4DtFSgvvYBl66yVf9iEVs9c,10656
|
|
290
294
|
deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py,sha256=Y0kOAfaaKCBmnvBPZXJLZK_C5FAWDyJC8fYbnNQx3eA,305
|
|
@@ -300,7 +304,7 @@ deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py,
|
|
|
300
304
|
deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
301
305
|
deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py,sha256=ZufEUHbgeUyDkCXWR37R-7piiF7xrWvgv6hROOigVIQ,13161
|
|
302
306
|
deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py,sha256=b-WtfA7zq4TgQiuqqNEMf7jmohnWBMW4opChHyg49Gc,414
|
|
303
|
-
deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py,sha256=
|
|
307
|
+
deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py,sha256=9EWRC-Wiyr_UEMPfpuTcX2tvsjPxSRY4n_lClcsK6vw,8389
|
|
304
308
|
deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
305
309
|
deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py,sha256=gZ7Q4vF12PLGhbHhOUAl9LIFWDOc9-GKhu3ly_LOkQ0,13997
|
|
306
310
|
deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py,sha256=H_9-iA1BXJwbPKrGEZBqxDO_En4sjXI8_xKSNYc-hnk,167
|
|
@@ -342,7 +346,7 @@ deepeval/metrics/task_completion/schema.py,sha256=JfnZkbCh7skWvrESy65GEo6Rvo0FDJ
|
|
|
342
346
|
deepeval/metrics/task_completion/task_completion.py,sha256=RKFkXCVOhO70I8A16zv5BCaV3QVKldNxawJ0T93U_Zc,8978
|
|
343
347
|
deepeval/metrics/task_completion/template.py,sha256=4xjTBcGrPQxInbf8iwJOZyok9SQex1aCkbxKmfkXoA4,10437
|
|
344
348
|
deepeval/metrics/tool_correctness/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
345
|
-
deepeval/metrics/tool_correctness/tool_correctness.py,sha256=
|
|
349
|
+
deepeval/metrics/tool_correctness/tool_correctness.py,sha256=8uyNFGM_aGFAB2aCv2CVDg5cjj0OJe8UVDqaT3Gp3kU,12090
|
|
346
350
|
deepeval/metrics/toxicity/__init__.py,sha256=1lgt8BKxfBDd7bfSLu_5kMzmsr9b2_ahPK9oq5zLkMk,39
|
|
347
351
|
deepeval/metrics/toxicity/schema.py,sha256=7uUdzXqTvIIz5nfahlllo_fzVRXg7UeMeXn7Hl32pKY,459
|
|
348
352
|
deepeval/metrics/toxicity/template.py,sha256=zl4y4Tg9gXkxKJ8aXVwj0cJ94pvfYuP7MTeV3dvB5yQ,5045
|
|
@@ -364,22 +368,23 @@ deepeval/models/embedding_models/ollama_embedding_model.py,sha256=kmJEVmo6nHdoDg
|
|
|
364
368
|
deepeval/models/embedding_models/openai_embedding_model.py,sha256=n1ER4gb4PGE7J7HICNoHAync9w9VaSl4tiU0d36T6Fs,2249
|
|
365
369
|
deepeval/models/hallucination_model.py,sha256=ABi978VKLE_jNHbDzM96kJ08EsZ5ZlvOlJHA_ptSkfQ,1003
|
|
366
370
|
deepeval/models/llms/__init__.py,sha256=qmvv7wnmTDvys2uUTwQRo-_3DlFV3fGLiewPeQYRsAI,670
|
|
367
|
-
deepeval/models/llms/amazon_bedrock_model.py,sha256=
|
|
371
|
+
deepeval/models/llms/amazon_bedrock_model.py,sha256=0AxdftPxYs86y2ILbRYhWRP38vP_yZl8E-ZrSLIlrOQ,5373
|
|
368
372
|
deepeval/models/llms/anthropic_model.py,sha256=T55-jKRbM3_B3Db9M3ruklm2cVVU1JDGAzicLTCBtgU,5151
|
|
369
373
|
deepeval/models/llms/azure_model.py,sha256=MG6sVGUgIy2RURwFWvRP7O_RF6QAg2dpqXIJhIsgY60,10994
|
|
370
374
|
deepeval/models/llms/deepseek_model.py,sha256=mz0U0uqazAVr8vv8SF74GRTr4ZEVc3Q1v9o5TVbmz_8,5440
|
|
371
375
|
deepeval/models/llms/gemini_model.py,sha256=QXf9mjopfWwJxpm0gbkXo6op_Wtu1GaIt1BfzS3OU8Q,8174
|
|
372
|
-
deepeval/models/llms/grok_model.py,sha256=
|
|
373
|
-
deepeval/models/llms/kimi_model.py,sha256=
|
|
376
|
+
deepeval/models/llms/grok_model.py,sha256=zPBmPnNCRGrtg_709gFv5A4iz7WilTmDpAyOpjXTa_M,5986
|
|
377
|
+
deepeval/models/llms/kimi_model.py,sha256=ZcvEwWgnv1dtmbq7LgMQJAjpkjxZr-l5eBi9KGqRbb0,6726
|
|
374
378
|
deepeval/models/llms/litellm_model.py,sha256=iu4-_JCpd9LdEa-eCWseD2iLTA-r7OSgYGWQ0IxB4eA,11527
|
|
375
379
|
deepeval/models/llms/local_model.py,sha256=PeF6ofMR8jBMTLzkCkgmkBJix9kHbWV5vTKGx8nehFs,3605
|
|
376
380
|
deepeval/models/llms/ollama_model.py,sha256=foL6sMza37Z0HH8qPStyIr1g-xEaD6Ce53L2C8Er-P8,3055
|
|
377
|
-
deepeval/models/llms/openai_model.py,sha256=
|
|
381
|
+
deepeval/models/llms/openai_model.py,sha256=84c_g6jegbQwncrqYYaodJqDgj1SIfoSy2p44ghZfR8,17231
|
|
378
382
|
deepeval/models/llms/utils.py,sha256=ZMZ02kjXAAleq0bIEyjj-gZwe6Gp0b0mK8YMuid2-20,722
|
|
379
383
|
deepeval/models/mlllms/__init__.py,sha256=19nN6kUB5XI0nUWUQX0aD9GBUMM8WWGvsDgKjuT4EF4,144
|
|
380
384
|
deepeval/models/mlllms/gemini_model.py,sha256=bWJe5x_ayHvxtZffKFcrFWkc4_W-hcLu8ErFjecoF_0,9593
|
|
381
385
|
deepeval/models/mlllms/ollama_model.py,sha256=3i79vflDPS1f4R105VMi26SwlZVkyQFCTJT4OJtU5q4,3823
|
|
382
386
|
deepeval/models/mlllms/openai_model.py,sha256=q1pe6UfQ39IUFrPSRYyM1h-kz4L5YNkqtzHQsNqAl9M,9786
|
|
387
|
+
deepeval/models/retry_policy.py,sha256=AQWACiUfs8ud-etXUMhd0NKxB6NmDlGvHmTcbT6SxS8,9072
|
|
383
388
|
deepeval/models/summac_model.py,sha256=wKeH7pWQRXrTlzlIw_r1YCb8b7jUhWq6jUz9FiNUCSg,1992
|
|
384
389
|
deepeval/models/unbias_model.py,sha256=umOMhQLTmnD7uOuhiQufEl4Wlti4q2s3EtKOpds7zhs,597
|
|
385
390
|
deepeval/models/utils.py,sha256=-3XDgg1U7PZ0jpLFiYXxqdBhp7idvlo7RPZv5SoD8lc,1130
|
|
@@ -388,7 +393,7 @@ deepeval/openai/extractors.py,sha256=q062nlYKuPVwqfLFYCD1yWv7xHF1U_XrYdAp5ve2l_E
|
|
|
388
393
|
deepeval/openai/patch.py,sha256=tPDqXaBScBJveM9P5xLT_mVwkubw0bOey-efvdjZIfg,7466
|
|
389
394
|
deepeval/openai/utils.py,sha256=-84VZGUsnzRkYAFWc_DGaGuQTDCUItk0VtUTdjtSxg4,2748
|
|
390
395
|
deepeval/openai_agents/__init__.py,sha256=u-e9laod3LyPfLcI5lr7Yhk8ArfWvlpr-D4_idWIt0A,321
|
|
391
|
-
deepeval/openai_agents/agent.py,sha256=
|
|
396
|
+
deepeval/openai_agents/agent.py,sha256=lgwc9pXhJn1xbytl2sy58aZX-gIsFP87fhImXnV9EJU,5564
|
|
392
397
|
deepeval/openai_agents/callback_handler.py,sha256=AXnL9teO8txUwK3cxBpqFTki5ZWOEy6kSglLjrnCNyA,3264
|
|
393
398
|
deepeval/openai_agents/extractors.py,sha256=0jZxwgY1NQ3mMxVWPpLcMpKlbj-aYV7rwuzRzG8hdZs,11529
|
|
394
399
|
deepeval/openai_agents/patch.py,sha256=zSmRV5yOReHC6IylhT93SM1nQpmH3sEWfYcJqa_iM84,3684
|
|
@@ -432,7 +437,7 @@ deepeval/test_case/mcp.py,sha256=Z625NLvz0E_UJpbyfyuAi_4nsqKH6DByBf0rfKd70xU,187
|
|
|
432
437
|
deepeval/test_case/mllm_test_case.py,sha256=8a0YoE72geX_fLI6yk_cObSxCPddwW-DOb-5OPE1-W8,5414
|
|
433
438
|
deepeval/test_case/utils.py,sha256=5lT7QmhItsQHt44-qQfspuktilcrEyvl2cS0cgUJxds,809
|
|
434
439
|
deepeval/test_run/__init__.py,sha256=3npHzm1Z96M2qFVtm22-RTfrVeQs27jKI-RmSWM2nh0,747
|
|
435
|
-
deepeval/test_run/api.py,sha256=
|
|
440
|
+
deepeval/test_run/api.py,sha256=ztXrcMsz0gDebd--IqQrw_gEEBlrkxnAk-WxpUFJQg0,5427
|
|
436
441
|
deepeval/test_run/cache.py,sha256=2bKXQ4GvHe2xowFGz4ekoheLno1Yv7O6PfuanFkWnA4,12786
|
|
437
442
|
deepeval/test_run/hooks.py,sha256=Qnd06bk9RJN4WmFUzJrBAi3Xj261hzyzI2iRmG8wbKw,375
|
|
438
443
|
deepeval/test_run/hyperparameters.py,sha256=f7M07w1EfT8YPtiD9xVIVYa3ZewkxewSkK7knwv0YlY,2289
|
|
@@ -446,16 +451,16 @@ deepeval/tracing/offline_evals/span.py,sha256=pXqTVXs-WnjRVpCYYEbNe0zSM6Wz9GsKHs
|
|
|
446
451
|
deepeval/tracing/offline_evals/thread.py,sha256=bcSGFcZJKnszArOLIlWvnCyt0zSmsd7Xsw5rl4RTVFg,1981
|
|
447
452
|
deepeval/tracing/offline_evals/trace.py,sha256=vTflaTKysKRiYvKA-Nx6PUJ3J6NrRLXiIdWieVcm90E,1868
|
|
448
453
|
deepeval/tracing/otel/__init__.py,sha256=HQsaF5yLPwyW5qg8AOV81_nG_7pFHnatOTHi9Wx3HEk,88
|
|
449
|
-
deepeval/tracing/otel/exporter.py,sha256=
|
|
450
|
-
deepeval/tracing/otel/utils.py,sha256=
|
|
454
|
+
deepeval/tracing/otel/exporter.py,sha256=dXQd834zm5rm1ss9pWkBBlk-JSdtiw7aFLso2hM53XY,26372
|
|
455
|
+
deepeval/tracing/otel/utils.py,sha256=g8yAzhqbPh1fOKCWkfNekC6AVotLfu1SUcfNMo6zii8,9786
|
|
451
456
|
deepeval/tracing/patchers.py,sha256=DAPNkhrDtoeyJIVeQDUMhTz-xGcXu00eqjQZmov8FiU,3096
|
|
452
|
-
deepeval/tracing/perf_epoch_bridge.py,sha256=
|
|
453
|
-
deepeval/tracing/tracing.py,sha256=
|
|
457
|
+
deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
|
|
458
|
+
deepeval/tracing/tracing.py,sha256=StvwFEG3MG67n7PBEyDDycdj0myMbP3LMB_FBhaZH-Y,38741
|
|
454
459
|
deepeval/tracing/types.py,sha256=3w5HEI6y4zuzVr8xGEEzDviLZCX_s_pK85qbwnyf1aY,5196
|
|
455
|
-
deepeval/tracing/utils.py,sha256
|
|
456
|
-
deepeval/utils.py,sha256=
|
|
457
|
-
deepeval-3.
|
|
458
|
-
deepeval-3.
|
|
459
|
-
deepeval-3.
|
|
460
|
-
deepeval-3.
|
|
461
|
-
deepeval-3.
|
|
460
|
+
deepeval/tracing/utils.py,sha256=eTEickbDvRiOu1twNolh4sHnjZF49vqdLgI74BudeTw,6357
|
|
461
|
+
deepeval/utils.py,sha256=EimWDwI1pKCE8vl6kuTnGbGT6ep9zHL5sZ0o-gj49XI,16857
|
|
462
|
+
deepeval-3.5.0.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
|
|
463
|
+
deepeval-3.5.0.dist-info/METADATA,sha256=KBAB5m11q4GAhVwCJBmXZDtaYtKoAO3sQ0vg-ajFRLg,18682
|
|
464
|
+
deepeval-3.5.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
465
|
+
deepeval-3.5.0.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
|
|
466
|
+
deepeval-3.5.0.dist-info/RECORD,,
|
deepeval/env.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
import os
|
|
3
|
-
|
|
4
|
-
try:
|
|
5
|
-
from dotenv import load_dotenv, find_dotenv # type: ignore
|
|
6
|
-
except Exception:
|
|
7
|
-
load_dotenv = None
|
|
8
|
-
find_dotenv = None
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def autoload_dotenv() -> None:
|
|
12
|
-
"""
|
|
13
|
-
Autoload environment variables for DeepEval at import time.
|
|
14
|
-
|
|
15
|
-
Precedence from highest -> lowest:
|
|
16
|
-
1) Existing process environment variables
|
|
17
|
-
2) .env.local (from current working directory)
|
|
18
|
-
3) .env (from current working directory)
|
|
19
|
-
|
|
20
|
-
Behavior:
|
|
21
|
-
- Loads .env.local then .env if present, without overriding existing vars.
|
|
22
|
-
- Opt-out by setting DEEPEVAL_DISABLE_DOTENV=1.
|
|
23
|
-
- Soft-fails cleanly if python-dotenv is not installed.
|
|
24
|
-
"""
|
|
25
|
-
if os.getenv("DEEPEVAL_DISABLE_DOTENV") == "1":
|
|
26
|
-
return
|
|
27
|
-
|
|
28
|
-
if not (load_dotenv and find_dotenv):
|
|
29
|
-
return
|
|
30
|
-
|
|
31
|
-
for name in (".env.local", ".env"):
|
|
32
|
-
path = find_dotenv(name, usecwd=True)
|
|
33
|
-
if path:
|
|
34
|
-
# Don't override previously set values
|
|
35
|
-
load_dotenv(path, override=False)
|
|
@@ -1,364 +0,0 @@
|
|
|
1
|
-
from deepeval.telemetry import capture_tracing_integration
|
|
2
|
-
from deepeval.metrics import BaseMetric
|
|
3
|
-
from typing import List, Optional
|
|
4
|
-
import functools
|
|
5
|
-
import inspect
|
|
6
|
-
import json
|
|
7
|
-
from deepeval.test_case import LLMTestCase
|
|
8
|
-
from deepeval.tracing.types import TestCaseMetricPair
|
|
9
|
-
from deepeval.tracing.tracing import trace_manager
|
|
10
|
-
from deepeval.tracing.otel.utils import parse_string, parse_list_of_strings
|
|
11
|
-
from opentelemetry import trace
|
|
12
|
-
|
|
13
|
-
try:
|
|
14
|
-
from opentelemetry.trace import NoOpTracer
|
|
15
|
-
|
|
16
|
-
opentelemetry_installed = True
|
|
17
|
-
except:
|
|
18
|
-
opentelemetry_installed = False
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def is_opentelemetry_available():
|
|
22
|
-
if not opentelemetry_installed:
|
|
23
|
-
raise ImportError(
|
|
24
|
-
"OpenTelemetry SDK is not available. Please install it with `pip install opentelemetry-sdk`."
|
|
25
|
-
)
|
|
26
|
-
return True
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
try:
|
|
30
|
-
from pydantic_ai.agent import Agent
|
|
31
|
-
from pydantic_ai.models.instrumented import InstrumentedModel
|
|
32
|
-
|
|
33
|
-
pydantic_ai_installed = True
|
|
34
|
-
except:
|
|
35
|
-
pydantic_ai_installed = False
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def is_pydantic_ai_installed():
|
|
39
|
-
if not pydantic_ai_installed:
|
|
40
|
-
raise ImportError(
|
|
41
|
-
"Pydantic AI is not installed. Please install it with `pip install pydantic-ai`."
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class PydanticAIAgent(Agent):
|
|
46
|
-
def __init__(self, *args, **kwargs):
|
|
47
|
-
with capture_tracing_integration("pydantic_ai.agent.PydanticAIAgent"):
|
|
48
|
-
is_pydantic_ai_installed()
|
|
49
|
-
is_opentelemetry_available()
|
|
50
|
-
|
|
51
|
-
super().__init__(*args, **kwargs)
|
|
52
|
-
|
|
53
|
-
# attributes to be set if ran synchronously
|
|
54
|
-
self.metric_collection: str = None
|
|
55
|
-
self.metrics: list[BaseMetric] = None
|
|
56
|
-
|
|
57
|
-
# trace attributes to be set if ran synchronously
|
|
58
|
-
self._trace_name: str = None
|
|
59
|
-
self._trace_tags: list[str] = None
|
|
60
|
-
self._trace_metadata: dict = None
|
|
61
|
-
self._trace_thread_id: str = None
|
|
62
|
-
self._trace_user_id: str = None
|
|
63
|
-
|
|
64
|
-
# Patch the run method only for this instance
|
|
65
|
-
self._patch_run_method()
|
|
66
|
-
self._patch_run_method_sync()
|
|
67
|
-
self._patch_tool_decorator()
|
|
68
|
-
|
|
69
|
-
def _patch_tool_decorator(self):
|
|
70
|
-
"""Patch the tool decorator to print input and output"""
|
|
71
|
-
original_tool = self.tool
|
|
72
|
-
|
|
73
|
-
@functools.wraps(original_tool)
|
|
74
|
-
def patched_tool(
|
|
75
|
-
*args,
|
|
76
|
-
metric_collection: Optional[str] = None,
|
|
77
|
-
metrics: Optional[List[BaseMetric]] = None,
|
|
78
|
-
**kwargs
|
|
79
|
-
):
|
|
80
|
-
|
|
81
|
-
# Check if function is in args (direct decoration: @agent.tool)
|
|
82
|
-
if args and callable(args[0]):
|
|
83
|
-
original_func = args[0]
|
|
84
|
-
patched_func = self._create_patched_function(
|
|
85
|
-
original_func, metric_collection, metrics
|
|
86
|
-
)
|
|
87
|
-
new_args = (patched_func,) + args[1:]
|
|
88
|
-
result = original_tool(*new_args, **kwargs)
|
|
89
|
-
return result
|
|
90
|
-
else:
|
|
91
|
-
# Decorator called with parameters: @agent.tool(metric_collection="...")
|
|
92
|
-
# Return a decorator that will receive the function
|
|
93
|
-
def decorator_with_params(func):
|
|
94
|
-
patched_func = self._create_patched_function(
|
|
95
|
-
func, metric_collection, metrics
|
|
96
|
-
)
|
|
97
|
-
return original_tool(patched_func, **kwargs)
|
|
98
|
-
|
|
99
|
-
return decorator_with_params
|
|
100
|
-
|
|
101
|
-
# Replace the tool method for this instance
|
|
102
|
-
self.tool = patched_tool
|
|
103
|
-
|
|
104
|
-
def _create_patched_function(
|
|
105
|
-
self, original_func, metric_collection, metrics
|
|
106
|
-
):
|
|
107
|
-
"""Create a patched version of the function that adds tracing"""
|
|
108
|
-
if inspect.iscoroutinefunction(original_func):
|
|
109
|
-
|
|
110
|
-
@functools.wraps(original_func)
|
|
111
|
-
async def patched_async_func(*func_args, **func_kwargs):
|
|
112
|
-
result = await original_func(*func_args, **func_kwargs)
|
|
113
|
-
|
|
114
|
-
current_span = trace.get_current_span()
|
|
115
|
-
if current_span.is_recording():
|
|
116
|
-
try:
|
|
117
|
-
result_str = str(result)
|
|
118
|
-
except Exception:
|
|
119
|
-
result_str = ""
|
|
120
|
-
current_span.set_attribute(
|
|
121
|
-
"confident.span.output", result_str
|
|
122
|
-
)
|
|
123
|
-
if metric_collection:
|
|
124
|
-
current_span.set_attribute(
|
|
125
|
-
"confident.span.metric_collection",
|
|
126
|
-
metric_collection,
|
|
127
|
-
)
|
|
128
|
-
# TODO: add metrics in component level evals
|
|
129
|
-
return result
|
|
130
|
-
|
|
131
|
-
return patched_async_func
|
|
132
|
-
else:
|
|
133
|
-
|
|
134
|
-
@functools.wraps(original_func)
|
|
135
|
-
def patched_sync_func(*func_args, **func_kwargs):
|
|
136
|
-
result = original_func(*func_args, **func_kwargs)
|
|
137
|
-
|
|
138
|
-
current_span = trace.get_current_span()
|
|
139
|
-
if current_span.is_recording():
|
|
140
|
-
try:
|
|
141
|
-
result_str = str(result)
|
|
142
|
-
except Exception:
|
|
143
|
-
result_str = ""
|
|
144
|
-
current_span.set_attribute(
|
|
145
|
-
"confident.span.output", result_str
|
|
146
|
-
)
|
|
147
|
-
if metric_collection:
|
|
148
|
-
current_span.set_attribute(
|
|
149
|
-
"confident.span.metric_collection",
|
|
150
|
-
metric_collection,
|
|
151
|
-
)
|
|
152
|
-
# TODO: add metrics in component level evals
|
|
153
|
-
return result
|
|
154
|
-
|
|
155
|
-
return patched_sync_func
|
|
156
|
-
|
|
157
|
-
def _patch_run_method(self):
|
|
158
|
-
"""Patch the Agent.run method only for this PydanticAIAgent instance"""
|
|
159
|
-
original_run = self.run
|
|
160
|
-
|
|
161
|
-
@functools.wraps(original_run)
|
|
162
|
-
async def patched_run(
|
|
163
|
-
*args,
|
|
164
|
-
metric_collection=None,
|
|
165
|
-
metrics=None,
|
|
166
|
-
trace_name=None,
|
|
167
|
-
trace_tags=None,
|
|
168
|
-
trace_metadata=None,
|
|
169
|
-
trace_thread_id=None,
|
|
170
|
-
trace_user_id=None,
|
|
171
|
-
**kwargs
|
|
172
|
-
):
|
|
173
|
-
# extract and validate flattened arguments - use safe pop with defaults
|
|
174
|
-
metric_collection = parse_string(metric_collection)
|
|
175
|
-
trace_name = parse_string(trace_name)
|
|
176
|
-
trace_tags = parse_list_of_strings(trace_tags)
|
|
177
|
-
trace_thread_id = parse_string(trace_thread_id)
|
|
178
|
-
trace_user_id = parse_string(trace_user_id)
|
|
179
|
-
|
|
180
|
-
if metrics is not None and not (
|
|
181
|
-
isinstance(metrics, list)
|
|
182
|
-
and all(isinstance(m, BaseMetric) for m in metrics)
|
|
183
|
-
):
|
|
184
|
-
raise TypeError(
|
|
185
|
-
"metrics must be a list of BaseMetric instances"
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
if trace_metadata is not None and not isinstance(
|
|
189
|
-
trace_metadata, dict
|
|
190
|
-
):
|
|
191
|
-
raise TypeError("trace_metadata must be a dictionary")
|
|
192
|
-
|
|
193
|
-
model = kwargs.get("model", None)
|
|
194
|
-
infer_name = kwargs.get("infer_name", True)
|
|
195
|
-
|
|
196
|
-
if infer_name and self.name is None:
|
|
197
|
-
self._infer_name(inspect.currentframe())
|
|
198
|
-
model_used = self._get_model(model)
|
|
199
|
-
del model
|
|
200
|
-
|
|
201
|
-
if isinstance(model_used, InstrumentedModel):
|
|
202
|
-
tracer = model_used.instrumentation_settings.tracer
|
|
203
|
-
else:
|
|
204
|
-
tracer = NoOpTracer()
|
|
205
|
-
|
|
206
|
-
with tracer.start_as_current_span("agent") as run_span:
|
|
207
|
-
result = await original_run(*args, **kwargs)
|
|
208
|
-
|
|
209
|
-
name = "agent"
|
|
210
|
-
if self.name:
|
|
211
|
-
name = str(self.name)
|
|
212
|
-
|
|
213
|
-
input = ""
|
|
214
|
-
if isinstance(args[0], str):
|
|
215
|
-
input = args[0]
|
|
216
|
-
elif isinstance(args[0], list) and all(
|
|
217
|
-
isinstance(i, str) for i in args[0]
|
|
218
|
-
):
|
|
219
|
-
input = args[0]
|
|
220
|
-
|
|
221
|
-
output = ""
|
|
222
|
-
try:
|
|
223
|
-
output = str(result.output)
|
|
224
|
-
except Exception:
|
|
225
|
-
pass
|
|
226
|
-
|
|
227
|
-
# set agent span attributes
|
|
228
|
-
run_span.set_attribute("confident.span.type", "agent")
|
|
229
|
-
run_span.set_attribute("confident.agent.name", name)
|
|
230
|
-
run_span.set_attribute("confident.agent.input", input)
|
|
231
|
-
run_span.set_attribute("confident.agent.output", output)
|
|
232
|
-
|
|
233
|
-
# fallback for input and output not being set
|
|
234
|
-
run_span.set_attribute("confident.span.input", input)
|
|
235
|
-
run_span.set_attribute("confident.span.output", output)
|
|
236
|
-
|
|
237
|
-
if metric_collection: # flattened argument to be replaced
|
|
238
|
-
run_span.set_attribute(
|
|
239
|
-
"confident.span.metric_collection", metric_collection
|
|
240
|
-
)
|
|
241
|
-
elif self.metric_collection: # for run_sync
|
|
242
|
-
run_span.set_attribute(
|
|
243
|
-
"confident.span.metric_collection",
|
|
244
|
-
self.metric_collection,
|
|
245
|
-
)
|
|
246
|
-
|
|
247
|
-
# set the flattened trace attributes
|
|
248
|
-
if trace_name:
|
|
249
|
-
run_span.set_attribute("confident.trace.name", trace_name)
|
|
250
|
-
if trace_tags:
|
|
251
|
-
run_span.set_attribute("confident.trace.tags", trace_tags)
|
|
252
|
-
if trace_metadata:
|
|
253
|
-
run_span.set_attribute(
|
|
254
|
-
"confident.trace.metadata", json.dumps(trace_metadata)
|
|
255
|
-
)
|
|
256
|
-
if trace_thread_id:
|
|
257
|
-
run_span.set_attribute(
|
|
258
|
-
"confident.trace.thread_id", trace_thread_id
|
|
259
|
-
)
|
|
260
|
-
if trace_user_id:
|
|
261
|
-
run_span.set_attribute(
|
|
262
|
-
"confident.trace.user_id", trace_user_id
|
|
263
|
-
)
|
|
264
|
-
|
|
265
|
-
# for run_sync
|
|
266
|
-
if self._trace_name:
|
|
267
|
-
run_span.set_attribute(
|
|
268
|
-
"confident.trace.name", self._trace_name
|
|
269
|
-
)
|
|
270
|
-
if self._trace_tags:
|
|
271
|
-
run_span.set_attribute(
|
|
272
|
-
"confident.trace.tags", self._trace_tags
|
|
273
|
-
)
|
|
274
|
-
if self._trace_metadata:
|
|
275
|
-
run_span.set_attribute(
|
|
276
|
-
"confident.trace.metadata",
|
|
277
|
-
json.dumps(self._trace_metadata),
|
|
278
|
-
)
|
|
279
|
-
if self._trace_thread_id:
|
|
280
|
-
run_span.set_attribute(
|
|
281
|
-
"confident.trace.thread_id", self._trace_thread_id
|
|
282
|
-
)
|
|
283
|
-
if self._trace_user_id:
|
|
284
|
-
run_span.set_attribute(
|
|
285
|
-
"confident.trace.user_id", self._trace_user_id
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
if metrics: # flattened argument to be replaced
|
|
289
|
-
trace_manager.test_case_metrics.append(
|
|
290
|
-
TestCaseMetricPair(
|
|
291
|
-
test_case=LLMTestCase(
|
|
292
|
-
input=input, actual_output=output
|
|
293
|
-
),
|
|
294
|
-
metrics=metrics,
|
|
295
|
-
)
|
|
296
|
-
)
|
|
297
|
-
elif self.metrics: # for run_sync
|
|
298
|
-
trace_manager.test_case_metrics.append(
|
|
299
|
-
TestCaseMetricPair(
|
|
300
|
-
test_case=LLMTestCase(
|
|
301
|
-
input=input, actual_output=output
|
|
302
|
-
),
|
|
303
|
-
metrics=self.metrics,
|
|
304
|
-
)
|
|
305
|
-
)
|
|
306
|
-
|
|
307
|
-
return result
|
|
308
|
-
|
|
309
|
-
# Replace the method only for this instance
|
|
310
|
-
self.run = patched_run
|
|
311
|
-
|
|
312
|
-
def _patch_run_method_sync(self):
|
|
313
|
-
"""Patch the Agent.run method only for this PydanticAIAgent instance"""
|
|
314
|
-
original_run = self.run_sync
|
|
315
|
-
|
|
316
|
-
@functools.wraps(original_run)
|
|
317
|
-
def patched_run(
|
|
318
|
-
*args,
|
|
319
|
-
metric_collection=None,
|
|
320
|
-
metrics=None,
|
|
321
|
-
trace_name=None,
|
|
322
|
-
trace_tags=None,
|
|
323
|
-
trace_metadata=None,
|
|
324
|
-
trace_thread_id=None,
|
|
325
|
-
trace_user_id=None,
|
|
326
|
-
**kwargs
|
|
327
|
-
):
|
|
328
|
-
metric_collection = parse_string(metric_collection)
|
|
329
|
-
trace_name = parse_string(trace_name)
|
|
330
|
-
trace_tags = parse_list_of_strings(trace_tags)
|
|
331
|
-
trace_thread_id = parse_string(trace_thread_id)
|
|
332
|
-
trace_user_id = parse_string(trace_user_id)
|
|
333
|
-
|
|
334
|
-
if metrics is not None and not (
|
|
335
|
-
isinstance(metrics, list)
|
|
336
|
-
and all(isinstance(m, BaseMetric) for m in metrics)
|
|
337
|
-
):
|
|
338
|
-
raise TypeError(
|
|
339
|
-
"metrics must be a list of BaseMetric instances"
|
|
340
|
-
)
|
|
341
|
-
|
|
342
|
-
if trace_metadata is not None and not isinstance(
|
|
343
|
-
trace_metadata, dict
|
|
344
|
-
):
|
|
345
|
-
raise TypeError("trace_metadata must be a dictionary")
|
|
346
|
-
|
|
347
|
-
# attributes to be set if ran synchronously
|
|
348
|
-
if metric_collection:
|
|
349
|
-
self.metric_collection = metric_collection
|
|
350
|
-
if metrics:
|
|
351
|
-
self.metrics = metrics
|
|
352
|
-
|
|
353
|
-
self._trace_name = trace_name
|
|
354
|
-
self._trace_tags = trace_tags
|
|
355
|
-
self._trace_metadata = trace_metadata
|
|
356
|
-
self._trace_thread_id = trace_thread_id
|
|
357
|
-
self._trace_user_id = trace_user_id
|
|
358
|
-
|
|
359
|
-
result = original_run(*args, **kwargs)
|
|
360
|
-
|
|
361
|
-
return result
|
|
362
|
-
|
|
363
|
-
# Replace the method only for this instance
|
|
364
|
-
self.run_sync = patched_run
|
|
File without changes
|
|
File without changes
|
|
File without changes
|