deepeval 3.5.8__tar.gz → 3.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepeval-3.5.8 → deepeval-3.6.0}/PKG-INFO +1 -1
- deepeval-3.6.0/deepeval/_version.py +1 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/config/settings_manager.py +1 -1
- deepeval-3.6.0/deepeval/contextvars.py +25 -0
- deepeval-3.6.0/deepeval/dataset/__init__.py +11 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/evaluate/execute.py +15 -3
- deepeval-3.6.0/deepeval/integrations/pydantic_ai/__init__.py +5 -0
- deepeval-3.6.0/deepeval/integrations/pydantic_ai/agent.py +21 -0
- deepeval-3.6.0/deepeval/integrations/pydantic_ai/instrumentator.py +196 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/pydantic_ai/otel.py +8 -2
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/openai_agents/__init__.py +4 -3
- deepeval-3.6.0/deepeval/openai_agents/agent.py +36 -0
- deepeval-3.6.0/deepeval/openai_agents/callback_handler.py +135 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/openai_agents/extractors.py +83 -7
- deepeval-3.6.0/deepeval/openai_agents/patch.py +309 -0
- deepeval-3.6.0/deepeval/openai_agents/runner.py +348 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/context.py +1 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/otel/exporter.py +236 -174
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/otel/utils.py +95 -7
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/tracing.py +3 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/utils.py +4 -3
- {deepeval-3.5.8 → deepeval-3.6.0}/pyproject.toml +1 -1
- deepeval-3.5.8/deepeval/_version.py +0 -1
- deepeval-3.5.8/deepeval/dataset/__init__.py +0 -5
- deepeval-3.5.8/deepeval/integrations/pydantic_ai/__init__.py +0 -5
- deepeval-3.5.8/deepeval/integrations/pydantic_ai/agent.py +0 -339
- deepeval-3.5.8/deepeval/integrations/pydantic_ai/patcher.py +0 -484
- deepeval-3.5.8/deepeval/integrations/pydantic_ai/utils.py +0 -323
- deepeval-3.5.8/deepeval/openai_agents/agent.py +0 -194
- deepeval-3.5.8/deepeval/openai_agents/callback_handler.py +0 -134
- deepeval-3.5.8/deepeval/openai_agents/patch.py +0 -115
- deepeval-3.5.8/deepeval/openai_agents/runner.py +0 -335
- {deepeval-3.5.8 → deepeval-3.6.0}/LICENSE.md +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/README.md +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/annotation/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/annotation/annotation.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/annotation/api.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/arc/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/arc/arc.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/arc/mode.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/arc/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/base_benchmark.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/bbq/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/bbq/bbq.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/bbq/task.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/bbq/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/bool_q/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/bool_q/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/drop/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/drop/drop.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/drop/task.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/drop/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/gsm8k/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/task.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/task.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/ifeval/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/ifeval/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/lambada/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/lambada/lambada.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/lambada/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/task.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/task.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/task.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/modes/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/results.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/squad/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/squad/squad.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/squad/task.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/squad/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/tasks/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/task.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/winogrande/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/winogrande/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/cli/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/cli/dotenv_handler.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/cli/main.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/cli/server.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/cli/test.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/cli/types.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/cli/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/confident/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/confident/api.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/confident/types.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/config/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/config/settings.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/config/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/constants.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/dataset/api.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/dataset/dataset.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/dataset/golden.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/dataset/test_run_tracer.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/dataset/types.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/dataset/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/errors.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/evaluate/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/evaluate/api.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/evaluate/compare.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/evaluate/configs.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/evaluate/evaluate.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/evaluate/types.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/evaluate/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/crewai/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/crewai/agent.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/crewai/handler.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/crewai/patch.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/hugging_face/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/hugging_face/callback.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/hugging_face/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/langchain/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/langchain/callback.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/langchain/patch.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/langchain/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/llama_index/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/llama_index/agent/patched.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/llama_index/handler.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/integrations/llama_index/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/key_handler.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/base_metric.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/bias/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/bias/bias.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/bias/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/bias/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/nodes.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/templates.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/dag/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/dag/dag.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/dag/graph.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/dag/nodes.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/dag/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/dag/templates.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/dag/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/faithfulness/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/faithfulness/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/faithfulness/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/g_eval/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/g_eval/g_eval.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/g_eval/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/g_eval/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/g_eval/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/hallucination/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/hallucination/hallucination.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/hallucination/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/hallucination/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/indicator.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/json_correctness/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/json_correctness/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/json_correctness/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/mcp/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/mcp/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/mcp/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/misuse/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/misuse/misuse.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/misuse/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/misuse/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/non_advice/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/non_advice/non_advice.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/non_advice/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/non_advice/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/ragas.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/role_adherence/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/role_adherence/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/role_adherence/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/role_violation/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/role_violation/role_violation.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/role_violation/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/role_violation/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/summarization/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/summarization/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/summarization/summarization.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/summarization/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/task_completion/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/task_completion/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/task_completion/task_completion.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/task_completion/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/tool_correctness/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/toxicity/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/toxicity/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/toxicity/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/toxicity/toxicity.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/metrics/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/_summac_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/answer_relevancy_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/base_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/detoxify_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/embedding_models/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/hallucination_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/anthropic_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/azure_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/deepseek_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/gemini_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/grok_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/kimi_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/litellm_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/local_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/ollama_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/openai_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/llms/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/mlllms/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/mlllms/gemini_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/mlllms/ollama_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/mlllms/openai_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/retry_policy.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/summac_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/unbias_model.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/models/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/openai/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/openai/extractors.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/openai/patch.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/openai/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/plugins/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/plugins/plugin.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/progress_context.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/prompt/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/prompt/api.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/prompt/prompt.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/prompt/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/py.typed +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/red_teaming/README.md +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/scorer/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/scorer/scorer.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/simulator/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/simulator/conversation_simulator.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/simulator/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/simulator/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/singleton.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/base_synthesizer.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/chunking/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/chunking/context_generator.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/config.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/schema.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/synthesizer.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/templates/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/templates/template.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/templates/template_extraction.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/templates/template_prompt.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/types.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/synthesizer/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/telemetry.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_case/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_case/arena_test_case.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_case/conversational_test_case.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_case/llm_test_case.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_case/mcp.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_case/mllm_test_case.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_case/utils.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_run/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_run/api.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_run/cache.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_run/hooks.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_run/hyperparameters.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/test_run/test_run.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/api.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/offline_evals/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/offline_evals/api.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/offline_evals/span.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/offline_evals/thread.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/offline_evals/trace.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/otel/__init__.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/patchers.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/perf_epoch_bridge.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/types.py +0 -0
- {deepeval-3.5.8 → deepeval-3.6.0}/deepeval/tracing/utils.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__: str = "3.6.0"
|
|
@@ -15,7 +15,7 @@ from enum import Enum
|
|
|
15
15
|
from pydantic import SecretStr
|
|
16
16
|
from deepeval.config.settings import get_settings, _SAVE_RE
|
|
17
17
|
from deepeval.cli.dotenv_handler import DotenvHandler
|
|
18
|
-
from deepeval.utils import bool_to_env_str
|
|
18
|
+
from deepeval.config.utils import bool_to_env_str
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
StrOrEnum = Union[str, Enum]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextvars import ContextVar
|
|
4
|
+
from typing import TYPE_CHECKING, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from deepeval.dataset.golden import Golden
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
CURRENT_GOLDEN: ContextVar[Optional[Golden]] = ContextVar(
|
|
12
|
+
"CURRENT_GOLDEN", default=None
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def set_current_golden(golden: Optional[Golden]):
|
|
17
|
+
return CURRENT_GOLDEN.set(golden)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_current_golden() -> Optional[Golden]:
|
|
21
|
+
return CURRENT_GOLDEN.get()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def reset_current_golden(token) -> None:
|
|
25
|
+
CURRENT_GOLDEN.reset(token)
|
|
@@ -42,6 +42,7 @@ from deepeval.tracing.api import (
|
|
|
42
42
|
BaseApiSpan,
|
|
43
43
|
)
|
|
44
44
|
from deepeval.dataset import Golden
|
|
45
|
+
from deepeval.contextvars import set_current_golden, reset_current_golden
|
|
45
46
|
from deepeval.errors import MissingTestCaseParamsError
|
|
46
47
|
from deepeval.metrics.utils import copy_metrics
|
|
47
48
|
from deepeval.utils import (
|
|
@@ -1480,6 +1481,7 @@ def execute_agentic_test_cases_from_loop(
|
|
|
1480
1481
|
)
|
|
1481
1482
|
|
|
1482
1483
|
for golden in goldens:
|
|
1484
|
+
token = set_current_golden(golden)
|
|
1483
1485
|
with capture_evaluation_run("golden"):
|
|
1484
1486
|
# yield golden
|
|
1485
1487
|
count += 1
|
|
@@ -1492,8 +1494,14 @@ def execute_agentic_test_cases_from_loop(
|
|
|
1492
1494
|
_progress=progress,
|
|
1493
1495
|
_pbar_callback_id=pbar_tags_id,
|
|
1494
1496
|
):
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
+
try:
|
|
1498
|
+
# yield golden to user code
|
|
1499
|
+
yield golden
|
|
1500
|
+
# control has returned from user code without error, capture trace now
|
|
1501
|
+
current_trace: Trace = current_trace_context.get()
|
|
1502
|
+
finally:
|
|
1503
|
+
# after user code returns control, always reset the context
|
|
1504
|
+
reset_current_golden(token)
|
|
1497
1505
|
|
|
1498
1506
|
update_pbar(progress, pbar_tags_id)
|
|
1499
1507
|
update_pbar(progress, pbar_id)
|
|
@@ -1849,6 +1857,7 @@ def a_execute_agentic_test_cases_from_loop(
|
|
|
1849
1857
|
|
|
1850
1858
|
try:
|
|
1851
1859
|
for index, golden in enumerate(goldens):
|
|
1860
|
+
token = set_current_golden(golden)
|
|
1852
1861
|
current_golden_ctx.update(
|
|
1853
1862
|
{
|
|
1854
1863
|
"index": index,
|
|
@@ -1857,7 +1866,10 @@ def a_execute_agentic_test_cases_from_loop(
|
|
|
1857
1866
|
}
|
|
1858
1867
|
)
|
|
1859
1868
|
prev_task_length = len(created_tasks)
|
|
1860
|
-
|
|
1869
|
+
try:
|
|
1870
|
+
yield golden
|
|
1871
|
+
finally:
|
|
1872
|
+
reset_current_golden(token)
|
|
1861
1873
|
# if this golden created no tasks, bump bars now
|
|
1862
1874
|
if len(created_tasks) == prev_task_length:
|
|
1863
1875
|
update_pbar(progress, pbar_callback_id)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from pydantic_ai.agent import Agent
|
|
5
|
+
|
|
6
|
+
is_pydantic_ai_installed = True
|
|
7
|
+
except:
|
|
8
|
+
is_pydantic_ai_installed = False
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DeepEvalPydanticAIAgent(Agent):
|
|
12
|
+
|
|
13
|
+
def __init__(self, *args, **kwargs):
|
|
14
|
+
warnings.warn(
|
|
15
|
+
"instrument_pydantic_ai is deprecated and will be removed in a future version. "
|
|
16
|
+
"Please use the new ConfidentInstrumentationSettings instead. Docs: https://www.confident-ai.com/docs/integrations/third-party/pydantic-ai",
|
|
17
|
+
DeprecationWarning,
|
|
18
|
+
stacklevel=2,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
super().__init__(*args, **kwargs)
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import Literal, Optional, List
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from pydantic_ai.models.instrumented import InstrumentationSettings
|
|
7
|
+
from opentelemetry.sdk.trace import SpanProcessor, TracerProvider
|
|
8
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
9
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
|
10
|
+
OTLPSpanExporter,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
dependency_installed = True
|
|
14
|
+
except:
|
|
15
|
+
dependency_installed = False
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_dependency_installed():
|
|
19
|
+
if not dependency_installed:
|
|
20
|
+
raise ImportError(
|
|
21
|
+
"Dependencies are not installed. Please install it with `pip install pydantic-ai opentelemetry-sdk opentelemetry-exporter-otlp-proto-http`."
|
|
22
|
+
)
|
|
23
|
+
return True
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
from deepeval.confident.api import get_confident_api_key
|
|
27
|
+
from deepeval.prompt import Prompt
|
|
28
|
+
|
|
29
|
+
# OTLP_ENDPOINT = "http://127.0.0.1:4318/v1/traces"
|
|
30
|
+
OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SpanInterceptor(SpanProcessor):
|
|
34
|
+
def __init__(self, settings_instance):
|
|
35
|
+
# Keep a reference to the settings instance instead of copying values
|
|
36
|
+
self.settings: ConfidentInstrumentationSettings = settings_instance
|
|
37
|
+
|
|
38
|
+
def on_start(self, span, parent_context):
|
|
39
|
+
|
|
40
|
+
# set trace attributes
|
|
41
|
+
if self.settings.thread_id:
|
|
42
|
+
span.set_attribute(
|
|
43
|
+
"confident.trace.thread_id", self.settings.thread_id
|
|
44
|
+
)
|
|
45
|
+
if self.settings.user_id:
|
|
46
|
+
span.set_attribute("confident.trace.user_id", self.settings.user_id)
|
|
47
|
+
if self.settings.metadata:
|
|
48
|
+
span.set_attribute(
|
|
49
|
+
"confident.trace.metadata", json.dumps(self.settings.metadata)
|
|
50
|
+
)
|
|
51
|
+
if self.settings.tags:
|
|
52
|
+
span.set_attribute("confident.trace.tags", self.settings.tags)
|
|
53
|
+
if self.settings.metric_collection:
|
|
54
|
+
span.set_attribute(
|
|
55
|
+
"confident.trace.metric_collection",
|
|
56
|
+
self.settings.metric_collection,
|
|
57
|
+
)
|
|
58
|
+
if self.settings.environment:
|
|
59
|
+
span.set_attribute(
|
|
60
|
+
"confident.trace.environment", self.settings.environment
|
|
61
|
+
)
|
|
62
|
+
if self.settings.name:
|
|
63
|
+
span.set_attribute("confident.trace.name", self.settings.name)
|
|
64
|
+
if self.settings.confident_prompt:
|
|
65
|
+
span.set_attribute(
|
|
66
|
+
"confident.span.prompt",
|
|
67
|
+
json.dumps(
|
|
68
|
+
{
|
|
69
|
+
"alias": self.settings.confident_prompt.alias,
|
|
70
|
+
"version": self.settings.confident_prompt.version,
|
|
71
|
+
}
|
|
72
|
+
),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# set trace metric collection
|
|
76
|
+
if self.settings.trace_metric_collection:
|
|
77
|
+
span.set_attribute(
|
|
78
|
+
"confident.trace.metric_collection",
|
|
79
|
+
self.settings.trace_metric_collection,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# set agent name and metric collection
|
|
83
|
+
if span.attributes.get("agent_name"):
|
|
84
|
+
span.set_attribute("confident.span.type", "agent")
|
|
85
|
+
span.set_attribute(
|
|
86
|
+
"confident.span.name", span.attributes.get("agent_name")
|
|
87
|
+
)
|
|
88
|
+
if self.settings.agent_metric_collection:
|
|
89
|
+
span.set_attribute(
|
|
90
|
+
"confident.span.metric_collection",
|
|
91
|
+
self.settings.agent_metric_collection,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# set llm metric collection
|
|
95
|
+
if span.attributes.get("gen_ai.operation.name") in [
|
|
96
|
+
"chat",
|
|
97
|
+
"generate_content",
|
|
98
|
+
"text_completion",
|
|
99
|
+
]:
|
|
100
|
+
if self.settings.llm_metric_collection:
|
|
101
|
+
span.set_attribute(
|
|
102
|
+
"confident.span.metric_collection",
|
|
103
|
+
self.settings.llm_metric_collection,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# set tool metric collection
|
|
107
|
+
tool_name = span.attributes.get("gen_ai.tool.name")
|
|
108
|
+
if tool_name:
|
|
109
|
+
tool_metric_collection = (
|
|
110
|
+
self.settings.tool_metric_collection_map.get(tool_name)
|
|
111
|
+
)
|
|
112
|
+
if tool_metric_collection:
|
|
113
|
+
span.set_attribute(
|
|
114
|
+
"confident.span.metric_collection",
|
|
115
|
+
str(tool_metric_collection),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def on_end(self, span):
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class ConfidentInstrumentationSettings(InstrumentationSettings):
|
|
123
|
+
|
|
124
|
+
name: Optional[str] = None
|
|
125
|
+
thread_id: Optional[str] = None
|
|
126
|
+
user_id: Optional[str] = None
|
|
127
|
+
metadata: Optional[dict] = None
|
|
128
|
+
tags: Optional[List[str]] = None
|
|
129
|
+
environment: Literal["production", "staging", "development", "testing"] = (
|
|
130
|
+
None
|
|
131
|
+
)
|
|
132
|
+
metric_collection: Optional[str] = None
|
|
133
|
+
confident_prompt: Optional[Prompt] = None
|
|
134
|
+
llm_metric_collection: Optional[str] = None
|
|
135
|
+
agent_metric_collection: Optional[str] = None
|
|
136
|
+
tool_metric_collection_map: dict = {}
|
|
137
|
+
trace_metric_collection: Optional[str] = None
|
|
138
|
+
|
|
139
|
+
def __init__(
|
|
140
|
+
self,
|
|
141
|
+
api_key: Optional[str] = None,
|
|
142
|
+
name: Optional[str] = None,
|
|
143
|
+
thread_id: Optional[str] = None,
|
|
144
|
+
user_id: Optional[str] = None,
|
|
145
|
+
metadata: Optional[dict] = None,
|
|
146
|
+
tags: Optional[List[str]] = None,
|
|
147
|
+
metric_collection: Optional[str] = None,
|
|
148
|
+
confident_prompt: Optional[Prompt] = None,
|
|
149
|
+
llm_metric_collection: Optional[str] = None,
|
|
150
|
+
agent_metric_collection: Optional[str] = None,
|
|
151
|
+
tool_metric_collection_map: dict = {},
|
|
152
|
+
trace_metric_collection: Optional[str] = None,
|
|
153
|
+
):
|
|
154
|
+
is_dependency_installed()
|
|
155
|
+
|
|
156
|
+
_environment = os.getenv("CONFIDENT_TRACE_ENVIRONMENT", "development")
|
|
157
|
+
if _environment and _environment in [
|
|
158
|
+
"production",
|
|
159
|
+
"staging",
|
|
160
|
+
"development",
|
|
161
|
+
"testing",
|
|
162
|
+
]:
|
|
163
|
+
self.environment = _environment
|
|
164
|
+
|
|
165
|
+
self.tool_metric_collection_map = tool_metric_collection_map
|
|
166
|
+
self.name = name
|
|
167
|
+
self.thread_id = thread_id
|
|
168
|
+
self.user_id = user_id
|
|
169
|
+
self.metadata = metadata
|
|
170
|
+
self.tags = tags
|
|
171
|
+
self.metric_collection = metric_collection
|
|
172
|
+
self.confident_prompt = confident_prompt
|
|
173
|
+
self.llm_metric_collection = llm_metric_collection
|
|
174
|
+
self.agent_metric_collection = agent_metric_collection
|
|
175
|
+
self.trace_metric_collection = trace_metric_collection
|
|
176
|
+
|
|
177
|
+
if not api_key:
|
|
178
|
+
api_key = get_confident_api_key()
|
|
179
|
+
if not api_key:
|
|
180
|
+
raise ValueError("CONFIDENT_API_KEY is not set")
|
|
181
|
+
|
|
182
|
+
trace_provider = TracerProvider()
|
|
183
|
+
|
|
184
|
+
# Pass the entire settings instance instead of individual values
|
|
185
|
+
span_interceptor = SpanInterceptor(self)
|
|
186
|
+
trace_provider.add_span_processor(span_interceptor)
|
|
187
|
+
|
|
188
|
+
trace_provider.add_span_processor(
|
|
189
|
+
BatchSpanProcessor(
|
|
190
|
+
OTLPSpanExporter(
|
|
191
|
+
endpoint=OTLP_ENDPOINT,
|
|
192
|
+
headers={"x-confident-api-key": api_key},
|
|
193
|
+
)
|
|
194
|
+
)
|
|
195
|
+
)
|
|
196
|
+
super().__init__(tracer_provider=trace_provider)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from typing import Optional
|
|
2
|
-
import deepeval
|
|
3
3
|
from deepeval.telemetry import capture_tracing_integration
|
|
4
|
-
from deepeval.confident.api import get_confident_api_key
|
|
5
4
|
|
|
6
5
|
try:
|
|
7
6
|
from opentelemetry import trace
|
|
@@ -28,6 +27,13 @@ OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"
|
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
def instrument_pydantic_ai(api_key: Optional[str] = None):
|
|
30
|
+
warnings.warn(
|
|
31
|
+
"instrument_pydantic_ai is deprecated and will be removed in a future version. "
|
|
32
|
+
"Please use the new ConfidentInstrumentationSettings instead. Docs: https://www.confident-ai.com/docs/integrations/third-party/pydantic-ai",
|
|
33
|
+
DeprecationWarning,
|
|
34
|
+
stacklevel=2,
|
|
35
|
+
)
|
|
36
|
+
|
|
31
37
|
with capture_tracing_integration("pydantic_ai"):
|
|
32
38
|
is_opentelemetry_available()
|
|
33
39
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from deepeval.openai_agents.callback_handler import DeepEvalTracingProcessor
|
|
2
|
-
from deepeval.openai_agents.runner import Runner
|
|
3
|
-
from deepeval.openai_agents.patch import function_tool
|
|
4
2
|
from deepeval.openai_agents.agent import DeepEvalAgent as Agent
|
|
3
|
+
from deepeval.openai_agents.patch import function_tool
|
|
4
|
+
|
|
5
|
+
# from deepeval.openai_agents.runner import Runner
|
|
5
6
|
|
|
6
|
-
__all__ = ["DeepEvalTracingProcessor", "
|
|
7
|
+
__all__ = ["DeepEvalTracingProcessor", "Agent", "function_tool"]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Generic, TypeVar, List
|
|
5
|
+
|
|
6
|
+
from deepeval.prompt import Prompt
|
|
7
|
+
from deepeval.metrics import BaseMetric
|
|
8
|
+
from deepeval.tracing.types import LlmSpan
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from agents.agent import Agent as BaseAgent
|
|
12
|
+
from deepeval.openai_agents.patch import (
|
|
13
|
+
patch_default_agent_runner_get_model,
|
|
14
|
+
)
|
|
15
|
+
except Exception as e:
|
|
16
|
+
raise RuntimeError(
|
|
17
|
+
"openai-agents is required for this integration. Please install it."
|
|
18
|
+
) from e
|
|
19
|
+
|
|
20
|
+
TContext = TypeVar("TContext")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class DeepEvalAgent(BaseAgent[TContext], Generic[TContext]):
|
|
25
|
+
"""
|
|
26
|
+
A subclass of agents.Agent.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
llm_metric_collection: str = None
|
|
30
|
+
llm_metrics: List[BaseMetric] = None
|
|
31
|
+
confident_prompt: Prompt = None
|
|
32
|
+
agent_metrics: List[BaseMetric] = None
|
|
33
|
+
agent_metric_collection: str = None
|
|
34
|
+
|
|
35
|
+
def __post_init__(self):
|
|
36
|
+
patch_default_agent_runner_get_model()
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
from deepeval.tracing.tracing import (
|
|
2
|
+
Observer,
|
|
3
|
+
current_span_context,
|
|
4
|
+
trace_manager,
|
|
5
|
+
)
|
|
6
|
+
from deepeval.openai_agents.extractors import *
|
|
7
|
+
from deepeval.tracing.context import current_trace_context
|
|
8
|
+
from deepeval.tracing.utils import make_json_serializable
|
|
9
|
+
from time import perf_counter
|
|
10
|
+
from deepeval.tracing.types import TraceSpanStatus
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from agents.tracing import Span, Trace, TracingProcessor
|
|
14
|
+
from agents.tracing.span_data import (
|
|
15
|
+
AgentSpanData,
|
|
16
|
+
CustomSpanData,
|
|
17
|
+
FunctionSpanData,
|
|
18
|
+
GenerationSpanData,
|
|
19
|
+
GuardrailSpanData,
|
|
20
|
+
HandoffSpanData,
|
|
21
|
+
ResponseSpanData,
|
|
22
|
+
SpanData,
|
|
23
|
+
)
|
|
24
|
+
from deepeval.openai_agents.patch import (
|
|
25
|
+
patch_default_agent_run_single_turn,
|
|
26
|
+
patch_default_agent_run_single_turn_streamed,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
openai_agents_available = True
|
|
30
|
+
except ImportError:
|
|
31
|
+
openai_agents_available = False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _check_openai_agents_available():
|
|
35
|
+
if not openai_agents_available:
|
|
36
|
+
raise ImportError(
|
|
37
|
+
"openai-agents is required for this integration. Install it via your package manager"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class DeepEvalTracingProcessor(TracingProcessor):
|
|
42
|
+
def __init__(self) -> None:
|
|
43
|
+
_check_openai_agents_available()
|
|
44
|
+
patch_default_agent_run_single_turn()
|
|
45
|
+
patch_default_agent_run_single_turn_streamed()
|
|
46
|
+
self.span_observers: dict[str, Observer] = {}
|
|
47
|
+
|
|
48
|
+
def on_trace_start(self, trace: "Trace") -> None:
|
|
49
|
+
trace_dict = trace.export()
|
|
50
|
+
_trace_uuid = trace_dict.get("id")
|
|
51
|
+
_thread_id = trace_dict.get("group_id")
|
|
52
|
+
_trace_name = trace_dict.get("workflow_name")
|
|
53
|
+
_trace_metadata = trace_dict.get("metadata")
|
|
54
|
+
|
|
55
|
+
_trace = trace_manager.start_new_trace(trace_uuid=str(_trace_uuid))
|
|
56
|
+
_trace.thread_id = str(_thread_id)
|
|
57
|
+
_trace.name = str(_trace_name)
|
|
58
|
+
_trace.metadata = make_json_serializable(_trace_metadata)
|
|
59
|
+
current_trace_context.set(_trace)
|
|
60
|
+
|
|
61
|
+
trace_manager.add_span( # adds a dummy root span
|
|
62
|
+
BaseSpan(
|
|
63
|
+
uuid=_trace_uuid,
|
|
64
|
+
trace_uuid=_trace_uuid,
|
|
65
|
+
parent_uuid=None,
|
|
66
|
+
start_time=perf_counter(),
|
|
67
|
+
name=_trace_name,
|
|
68
|
+
status=TraceSpanStatus.IN_PROGRESS,
|
|
69
|
+
children=[],
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def on_trace_end(self, trace: "Trace") -> None:
|
|
74
|
+
trace_dict = trace.export()
|
|
75
|
+
_trace_uuid = trace_dict.get("id")
|
|
76
|
+
_trace_name = trace_dict.get("workflow_name")
|
|
77
|
+
|
|
78
|
+
trace_manager.remove_span(_trace_uuid) # removing the dummy root span
|
|
79
|
+
trace_manager.end_trace(_trace_uuid)
|
|
80
|
+
current_trace_context.set(None)
|
|
81
|
+
|
|
82
|
+
def on_span_start(self, span: "Span") -> None:
|
|
83
|
+
if not span.started_at:
|
|
84
|
+
return
|
|
85
|
+
current_span = current_span_context.get()
|
|
86
|
+
if current_span and isinstance(current_span, LlmSpan):
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
span_type = self.get_span_kind(span.span_data)
|
|
90
|
+
observer = Observer(span_type=span_type, func_name="NA")
|
|
91
|
+
if span_type == "llm":
|
|
92
|
+
observer.observe_kwargs["model"] = "temporary model"
|
|
93
|
+
observer.update_span_properties = (
|
|
94
|
+
lambda span_type: update_span_properties(span_type, span.span_data)
|
|
95
|
+
)
|
|
96
|
+
self.span_observers[span.span_id] = observer
|
|
97
|
+
observer.__enter__()
|
|
98
|
+
|
|
99
|
+
def on_span_end(self, span: "Span") -> None:
|
|
100
|
+
update_trace_properties_from_span_data(
|
|
101
|
+
current_trace_context.get(), span.span_data
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
current_span = current_span_context.get()
|
|
105
|
+
if current_span and isinstance(current_span, LlmSpan):
|
|
106
|
+
update_span_properties(current_span, span.span_data)
|
|
107
|
+
return
|
|
108
|
+
observer = self.span_observers.pop(span.span_id, None)
|
|
109
|
+
if observer:
|
|
110
|
+
observer.__exit__(None, None, None)
|
|
111
|
+
|
|
112
|
+
def force_flush(self) -> None:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
def shutdown(self) -> None:
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
def get_span_kind(self, span_data: "SpanData") -> str:
|
|
119
|
+
if isinstance(span_data, AgentSpanData):
|
|
120
|
+
return "agent"
|
|
121
|
+
if isinstance(span_data, FunctionSpanData):
|
|
122
|
+
return "tool"
|
|
123
|
+
if isinstance(span_data, MCPListToolsSpanData):
|
|
124
|
+
return "tool"
|
|
125
|
+
if isinstance(span_data, GenerationSpanData):
|
|
126
|
+
return "llm"
|
|
127
|
+
if isinstance(span_data, ResponseSpanData):
|
|
128
|
+
return "llm"
|
|
129
|
+
if isinstance(span_data, HandoffSpanData):
|
|
130
|
+
return "custom"
|
|
131
|
+
if isinstance(span_data, CustomSpanData):
|
|
132
|
+
return "base"
|
|
133
|
+
if isinstance(span_data, GuardrailSpanData):
|
|
134
|
+
return "base"
|
|
135
|
+
return "base"
|