azure-ai-evaluation 1.13.4__tar.gz → 1.13.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/CHANGELOG.md +6 -0
- {azure_ai_evaluation-1.13.4/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.13.5}/PKG-INFO +7 -1
- azure_ai_evaluation-1.13.5/TROUBLESHOOTING.md +130 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +5 -2
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +2 -2
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_version.py +1 -1
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5/azure_ai_evaluation.egg-info}/PKG-INFO +7 -1
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure_ai_evaluation.egg-info/SOURCES.txt +9 -0
- azure_ai_evaluation-1.13.5/samples/agent_evaluators/coherence.ipynb +204 -0
- azure_ai_evaluation-1.13.5/samples/agent_evaluators/fluency.ipynb +283 -0
- azure_ai_evaluation-1.13.5/samples/agent_evaluators/groundedness.ipynb +332 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/agent_evaluators/intent_resolution.ipynb +80 -81
- azure_ai_evaluation-1.13.5/samples/agent_evaluators/relevance.ipynb +208 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/agent_evaluators/response_completeness.ipynb +36 -13
- azure_ai_evaluation-1.13.5/samples/agent_evaluators/task_adherence.ipynb +260 -0
- azure_ai_evaluation-1.13.5/samples/agent_evaluators/task_completion.ipynb +355 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/agent_evaluators/task_navigation_efficiency.ipynb +42 -17
- azure_ai_evaluation-1.13.5/samples/agent_evaluators/tool_call_accuracy.ipynb +507 -0
- azure_ai_evaluation-1.13.5/samples/agent_evaluators/tool_input_accuracy.ipynb +427 -0
- azure_ai_evaluation-1.13.5/samples/agent_evaluators/tool_output_utilization.ipynb +535 -0
- azure_ai_evaluation-1.13.5/samples/agent_evaluators/tool_selection.ipynb +576 -0
- azure_ai_evaluation-1.13.5/samples/agent_evaluators/tool_success.ipynb +476 -0
- azure_ai_evaluation-1.13.4/TROUBLESHOOTING.md +0 -95
- azure_ai_evaluation-1.13.4/samples/agent_evaluators/task_adherence.ipynb +0 -245
- azure_ai_evaluation-1.13.4/samples/agent_evaluators/tool_call_accuracy.ipynb +0 -368
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/MANIFEST.in +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/README.md +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_aoai/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_aoai/aoai_grader.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_aoai/label_grader.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_aoai/python_grader.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_aoai/score_model_grader.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_aoai/string_check_grader.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_aoai/text_similarity_grader.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_azure/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_azure/_clients.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_azure/_envs.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_azure/_models.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/_experimental.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/constants.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/evaluation_onedp_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/math.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_configuration.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_model_base.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_serialization.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_types.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_utils/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_utils/model_base.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_utils/serialization.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_validation.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_vendor.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/_version.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/aio/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/aio/_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/aio/_configuration.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/aio/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/models/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/models/_enums.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/models/_models.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/models/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/py.typed +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/rai_service.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/_configuration.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/_model_base.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/_serialization.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/_version.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/aio/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/aio/_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/aio/_configuration.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/aio/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/models/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/models/_enums.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/models/_models.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/models/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/raiclient/py.typed +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_common/utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_constants.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_converters/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_converters/_ai_services.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_converters/_models.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_converters/_sk_services.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_eval_mapping.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_eval_run.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_evaluate.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_evaluate_aoai.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluate/_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluator_definition.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_qa/_qa.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_task_completion/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_exceptions.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_http_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/_check.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/_configuration.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/_constants.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/_errors.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/_flows.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/_service.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/entities.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/tracing.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/types.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_adapters/utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_config.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_engine.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_result.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_run.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_status.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_common/_async_token_provider.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_common/_logging.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/prompty/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/prompty/_connection.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/prompty/_exceptions.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/prompty/_prompty.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/prompty/_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_model_configurations.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_safety_evaluation/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_user_agent.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_vendor/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/py.typed +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_agent/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_agent/_agent_functions.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_agent/_agent_tools.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_agent/_agent_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_attack_objective_generator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_attack_strategy.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_callback_chat_target.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_default_converter.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_evaluation_processor.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_mlflow_integration.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_orchestrator_manager.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_red_team.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_red_team_result.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_result_processor.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/_rai_service_target.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/constants.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/exception_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/file_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/formatting_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/logging_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/metric_mapping.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/objective_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/progress_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/retry_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/red_team/_utils/strategy_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_adversarial_simulator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_constants.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_conversation/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_conversation/_conversation.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_simulator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure/ai/evaluation/simulator/_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure_ai_evaluation.egg-info/requires.txt +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/migration_guide.md +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/pyproject.toml +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/README.md +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/agent_evaluators/agent_evaluation.ipynb +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/agent_evaluators/instructions.md +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/agent_evaluators/sample_synthetic_conversations.jsonl +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/agent_evaluators/user_functions.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/aoai_score_model_grader_sample.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/data/custom_objectives_with_context_example.json +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/data/evaluate_test_data.jsonl +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/evaluation_samples_common.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/evaluation_samples_evaluate.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/evaluation_samples_evaluate_fdp.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/evaluation_samples_safety_evaluation.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/evaluation_samples_simulate.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/evaluation_samples_threshold.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/red_team_agent_tool_sample.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/red_team_samples.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/red_team_skip_upload.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/score_model_multimodal/aoai_score_model_grader_sample_audio.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/score_model_multimodal/aoai_score_model_grader_sample_audio_file.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/score_model_multimodal/aoai_score_model_grader_sample_image.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/score_model_multimodal/chat_compeletion_audio.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/score_model_multimodal/image.jpg +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/score_model_multimodal/input_audio.wav +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/samples/semantic_kernel_red_team_agent_sample.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/setup.cfg +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/setup.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/__openai_patcher.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/conftest.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/converters/ai_agent_converter/serialization_helper.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/converters/ai_agent_converter/test_sk_agent_converter_internals.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/converters/ai_agent_converter/test_sk_turn_idxs_from_conversation.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/target_fn.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/test_adv_simulator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/test_aoai_graders.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/test_builtin_evaluators.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/test_evaluate.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/test_lite_management_client.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/test_mass_evaluate.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/test_metrics_upload.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/test_prompty_async.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/test_red_team.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/test_remote_evaluation.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/e2etests/test_sim_and_eval.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_agent_evaluators.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_aoai_alignment_missing_rows.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_aoai_data_source.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_aoai_evaluation_pagination.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_aoai_integration_features.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_aoai_nested_integration.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_aoai_python_grader.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_aoai_score_model_grader.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_batch_run_context.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_built_in_evaluator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_completeness_evaluator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_content_safety_defect_rate.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_content_safety_rai_script.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_document_retrieval_evaluator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_eval_run.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_evaluate.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_evaluate_aoai.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_evaluate_mismatch.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_evaluate_performance.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_evaluator_scoring_patterns.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_evaluators/slow_eval.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_evaluators/test_conversation_thresholds.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_evaluators/test_service_evaluator_thresholds.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_evaluators/test_threshold_behavior.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_jailbreak_simulator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_lazy_imports.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_non_adv_simulator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/__init__.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_attack_objective_generator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_attack_strategy.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_callback_chat_target.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_constants.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_formatting_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_rai_service_eval_chat_target.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_rai_service_target.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_rai_service_true_false_scorer.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_red_team.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_red_team_language_support.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_red_team_result.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_redteam/test_strategy_utils.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_remote_evaluation_features.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_safety_evaluation.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_save_eval.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_simulator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_task_completion_evaluator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_task_navigation_efficiency_evaluators.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_tool_call_accuracy_evaluator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_tool_input_accuracy_evaluator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_tool_selection_evaluator.py +0 -0
- {azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/tests/unittests/test_utils.py +0 -0
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# Release History
|
|
2
2
|
|
|
3
|
+
## 1.13.5 (2025-11-10)
|
|
4
|
+
|
|
5
|
+
### Bugs Fixed
|
|
6
|
+
|
|
7
|
+
- **TaskAdherenceEvaluator:** treat tool definitions as optional so evaluations with only query/response inputs no longer raise “Either 'conversation' or individual inputs must be provided.”
|
|
8
|
+
|
|
3
9
|
## 1.13.4 (2025-11-10)
|
|
4
10
|
|
|
5
11
|
### Bugs Fixed
|
{azure_ai_evaluation-1.13.4/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.13.5}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.13.
|
|
3
|
+
Version: 1.13.5
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -418,6 +418,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
418
418
|
|
|
419
419
|
# Release History
|
|
420
420
|
|
|
421
|
+
## 1.13.5 (2025-11-10)
|
|
422
|
+
|
|
423
|
+
### Bugs Fixed
|
|
424
|
+
|
|
425
|
+
- **TaskAdherenceEvaluator:** treat tool definitions as optional so evaluations with only query/response inputs no longer raise “Either 'conversation' or individual inputs must be provided.”
|
|
426
|
+
|
|
421
427
|
## 1.13.4 (2025-11-10)
|
|
422
428
|
|
|
423
429
|
### Bugs Fixed
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Troubleshoot AI Evaluation SDK Issues
|
|
2
|
+
|
|
3
|
+
This guide walks you through how to investigate failures, common errors in the `azure-ai-evaluation` SDK, and steps to mitigate these issues.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
|
|
7
|
+
- [Handle Evaluate API Errors](#handle-evaluate-api-errors)
|
|
8
|
+
- [Troubleshoot Remote Tracking Issues](#troubleshoot-remote-tracking-issues)
|
|
9
|
+
- [Troubleshoot Column Mapping Issues](#troubleshoot-column-mapping-issues)
|
|
10
|
+
- [Troubleshoot Safety Evaluator Issues](#troubleshoot-safety-evaluator-issues)
|
|
11
|
+
- [Troubleshoot Quality Evaluator Issues](#troubleshoot-quality-evaluator-issues)
|
|
12
|
+
- [Handle Simulation Errors](#handle-simulation-errors)
|
|
13
|
+
- [Adversarial Simulation Supported Regions](#adversarial-simulation-supported-regions)
|
|
14
|
+
- [Need to generate simulations for specific harm type](#need-to-generate-simulations-for-specific-harm-type)
|
|
15
|
+
- [Simulator is slow](#simulator-is-slow)
|
|
16
|
+
- [Handle RedTeam Errors](#handle-redteam-errors)
|
|
17
|
+
- [Permission or authentication failures](#permission-or-authentication-failures)
|
|
18
|
+
- [Target resource not found](#target-resource-not-found)
|
|
19
|
+
- [Agent name not found](#agent-name-not-found)
|
|
20
|
+
- [Insufficient Storage Permissions](#insufficient-storage-permissions)
|
|
21
|
+
- [PyRIT "Error sending prompt" message](#pyrit-error-sending-prompt-message)
|
|
22
|
+
- [Logging](#logging)
|
|
23
|
+
- [Get Additional Help](#get-additional-help)
|
|
24
|
+
|
|
25
|
+
## Handle Evaluate API Errors
|
|
26
|
+
|
|
27
|
+
### Troubleshoot Remote Tracking Issues
|
|
28
|
+
|
|
29
|
+
- Before running `evaluate()`, to ensure that you can enable logging and tracing to your Azure AI project, make sure you are first logged in by running `az login`.
|
|
30
|
+
|
|
31
|
+
- Ensure that you assign the proper permissions to the storage account linked to your Azure AI Studio hub. This can be done with the following command. More information can be found [here](https://aka.ms/credentialleshub).
|
|
32
|
+
|
|
33
|
+
```Shell
|
|
34
|
+
# <mySubscriptionID>: Subscription ID of the Azure AI Studio hub's linked storage account (available in Azure AI hub resource view in Azure Portal).
|
|
35
|
+
# <myResourceGroupName>: Resource group of the Azure AI Studio hub's linked storage account.
|
|
36
|
+
# <user-id>: User object ID for role assignment (retrieve with "az ad user show" command).
|
|
37
|
+
|
|
38
|
+
az role assignment create --role "Storage Blob Data Contributor" --scope /subscriptions/<mySubscriptionID>/resourceGroups/<myResourceGroupName> --assignee-principal-type User --assignee-object-id "<user-id>"
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
- Additionally, if you're using a virtual network or private link, and your evaluation run upload fails because of that, check out this [guide](https://docs.microsoft.com/azure/machine-learning/how-to-enable-studio-virtual-network#access-data-using-the-studio).
|
|
42
|
+
|
|
43
|
+
### Troubleshoot Column Mapping Issues
|
|
44
|
+
|
|
45
|
+
- When using `column_mapping` parameter in evaluators, ensure all keys and values are non-empty strings and contain only alphanumeric characters. Empty strings, non-string values, or non-alphanumeric characters can cause serialization errors and issues in downstream applications. Example of valid mapping: `{"query": "${data.query}", "response": "${data.response}"}`.
|
|
46
|
+
|
|
47
|
+
### Troubleshoot Safety Evaluator Issues
|
|
48
|
+
|
|
49
|
+
- Risk and safety evaluators depend on the Azure AI Studio safety evaluation backend service. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaisafetyeval-regionsupport).
|
|
50
|
+
- If you encounter a 403 Unauthorized error when using safety evaluators, verify that you have the `Contributor` role assigned to your Azure AI project. `Contributor` role is currently required to run safety evaluations.
|
|
51
|
+
|
|
52
|
+
## Handle Simulation Errors
|
|
53
|
+
|
|
54
|
+
### Adversarial Simulation Supported Regions
|
|
55
|
+
|
|
56
|
+
Adversarial simulators use Azure AI Studio safety evaluation backend service to generate an adversarial dataset against your application. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaiadvsimulator-regionsupport).
|
|
57
|
+
|
|
58
|
+
### Need to generate simulations for specific harm type
|
|
59
|
+
|
|
60
|
+
The Adversarial simulator does not support selecting individual harms, instead we recommend running the `AdversarialSimulator` for 4x the number of specific harms as the `max_simulation_results`
|
|
61
|
+
|
|
62
|
+
### Simulator is slow
|
|
63
|
+
|
|
64
|
+
Identify the type of simulations being run (adversarial or non-adversarial).
|
|
65
|
+
Adjust parameters such as `api_call_retry_sleep_sec`, `api_call_delay_sec`, and `concurrent_async_task`. Please note that rate limits to llm calls can be both tokens per minute and requests per minute.
|
|
66
|
+
|
|
67
|
+
## Handle RedTeam errors
|
|
68
|
+
|
|
69
|
+
### Permission or authentication failures
|
|
70
|
+
- Run `az login` in the active shell before starting the scan and ensure the account has the **Azure AI User** role plus the `Storage Blob Data Contributor` assignment on the linked storage account. Both are required to create evaluation runs and upload artifacts.
|
|
71
|
+
- In secured hubs, confirm the linked storage account allows access from your network (or private endpoint) and that Entra ID authentication is enabled on the storage resource.
|
|
72
|
+
- If the helper warns `This may be due to missing environment variables or insufficient permissions.`, double-check the `AZURE_PROJECT_ENDPOINT`, `AGENT_NAME`, and storage role assignments before retrying.
|
|
73
|
+
|
|
74
|
+
### Target resource not found
|
|
75
|
+
- When initializing an Azure OpenAI deployment directly as the `target`, specify `azure_endpoint` as `https://<hub>.openai.azure.com/openai/deployments/<deployment_name>/chat/completions?api-version=2025-01-01-preview`.
|
|
76
|
+
- If you instantiate `AzureOpenAI`, use the resource-level endpoint format `https://<hub>.openai.azure.com/` and ensure the deployment name plus API version match an active deployment.
|
|
77
|
+
- A cloud run error such as `Error code: 404 - {'error': {'code': '404', 'message': 'Resource not found'}}` when creating the eval group can also indicate that `azure-ai-projects>=2.0.0b1` is not installed. Upgrade to that version or later to access the preview APIs used by Red Team.
|
|
78
|
+
|
|
79
|
+
### Agent name not found
|
|
80
|
+
- `(not_found) Agent <name> doesn’t exist` means the Azure AI project could not resolve the agent `name`. Names are case sensitive and differ from display names.
|
|
81
|
+
- Verify the `AZURE_PROJECT_ENDPOINT` points to the correct project and that the agent is published there.
|
|
82
|
+
- Requires `DefaultAzureCredential` from `azure.identity` and `AIProjectClient` from `azure.ai.projects`.
|
|
83
|
+
- Use the following helper to list agents in the current project and confirm the `name` column matches your `AGENT_NAME` value:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
def list_project_agents(endpoint: str | None = None) -> None:
|
|
87
|
+
project_endpoint = endpoint or os.environ.get("AZURE_PROJECT_ENDPOINT") or ""
|
|
88
|
+
if not project_endpoint:
|
|
89
|
+
print("Set AZURE_PROJECT_ENDPOINT before listing agents.")
|
|
90
|
+
return
|
|
91
|
+
with DefaultAzureCredential() as project_credential:
|
|
92
|
+
with AIProjectClient(
|
|
93
|
+
endpoint=project_endpoint,
|
|
94
|
+
credential=project_credential,
|
|
95
|
+
api_version="2025-11-15-preview",
|
|
96
|
+
) as project_client:
|
|
97
|
+
agents = list(project_client.agents.list())
|
|
98
|
+
if not agents:
|
|
99
|
+
print(f"No agents found in project: {project_endpoint}")
|
|
100
|
+
return
|
|
101
|
+
print(f"Agents in {project_endpoint}:")
|
|
102
|
+
for agent in agents:
|
|
103
|
+
display_name = agent.get("display_name") if isinstance(agent, dict) else getattr(agent, "display_name", "")
|
|
104
|
+
name = agent.get("name") if isinstance(agent, dict) else getattr(agent, "name", "")
|
|
105
|
+
print(f"- name: {name} | display_name: {display_name}")
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Insufficient Storage Permissions
|
|
109
|
+
- `WARNING: Failed to log artifacts to MLFlow: (UserError) Failed to upload evaluation run to the cloud due to insufficient permission to access the storage` means the linked storage account is missing the necessary assignments.
|
|
110
|
+
- Portal steps:
|
|
111
|
+
1. Open the resource group tied to the Azure AI Project in the Azure Portal.
|
|
112
|
+
2. Locate the linked storage account(s).
|
|
113
|
+
3. Select each storage account and choose **Access control (IAM)**.
|
|
114
|
+
4. Grant the affected identity the **Storage Blob Data Contributor** role.
|
|
115
|
+
- Prefer CLI? Reuse the `az role assignment create` command described in [Troubleshoot Remote Tracking Issues](#troubleshoot-remote-tracking-issues).
|
|
116
|
+
|
|
117
|
+
### PyRIT "Error sending prompt" message
|
|
118
|
+
- `Exception: Error sending prompt with conversation ID: <guid>` is raised by PyRIT when a target LLM call fails inside the `PromptSendingOrchestrator`. The runner retries the conversation up to the configured limit, so occasional occurrences usually resolve automatically.
|
|
119
|
+
- Common triggers include transient network issues, 429 throttling, or 5xx responses from the target deployment. Even if retries succeed you will still see the stack trace in notebook output.
|
|
120
|
+
- Inspect the `redteam.log` file written to the scan output directory (typically `<working dir>/runs/<scan_id>/redteam.log`) for the underlying exception and HTTP status. Increase verbosity with `DEBUG=True` for deeper diagnostics.
|
|
121
|
+
- Running in Azure AI Studio? Navigate to **Evaluate > Red Team > <run name> > Logs**, download `redteam.log`, and search for the conversation ID to inspect the payload.
|
|
122
|
+
- If one conversation ID keeps failing after retries, verify the target credentials, check deployment health, and review Azure OpenAI quota or rate-limit alerts in the Azure portal.
|
|
123
|
+
|
|
124
|
+
## Logging
|
|
125
|
+
|
|
126
|
+
You can set logging level via environment variable `PF_LOGGING_LEVEL`, valid values include `CRITICAL`, `ERROR`, `WARNING`, `INFO`, `DEBUG`; default is `INFO`.
|
|
127
|
+
|
|
128
|
+
## Get Additional Help
|
|
129
|
+
|
|
130
|
+
Additional information on ways to reach out for support can be found in the [SUPPORT.md](https://github.com/Azure/azure-sdk-for-python/blob/main/SUPPORT.md) at the root of the repo.
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
5
|
import inspect
|
|
6
|
+
import logging
|
|
6
7
|
from abc import ABC, abstractmethod
|
|
7
8
|
import json
|
|
8
9
|
import copy
|
|
@@ -46,6 +47,8 @@ P = ParamSpec("P")
|
|
|
46
47
|
T = TypeVar("T")
|
|
47
48
|
T_EvalValue = TypeVar("T_EvalValue")
|
|
48
49
|
|
|
50
|
+
logger = logging.getLogger(__name__)
|
|
51
|
+
|
|
49
52
|
|
|
50
53
|
class DerivedEvalInput(TypedDict, total=False):
|
|
51
54
|
"""The eval input generated by EvaluatorBase._derive_conversation_starter."""
|
|
@@ -593,7 +596,7 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
|
|
|
593
596
|
try:
|
|
594
597
|
eval_input_list = self._convert_kwargs_to_eval_input(**kwargs)
|
|
595
598
|
except Exception as e:
|
|
596
|
-
|
|
599
|
+
logger.error(f"Error converting kwargs to eval_input_list: {e}")
|
|
597
600
|
raise e
|
|
598
601
|
per_turn_results = []
|
|
599
602
|
# Evaluate all inputs.
|
|
@@ -630,7 +633,7 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
|
|
|
630
633
|
else:
|
|
631
634
|
result[result_key] = EVALUATION_PASS_FAIL_MAPPING[False]
|
|
632
635
|
except Exception as e:
|
|
633
|
-
|
|
636
|
+
logger.warning(f"Error calculating binary result: {e}")
|
|
634
637
|
per_turn_results.append(result)
|
|
635
638
|
# Return results as-is if only one result was produced.
|
|
636
639
|
|
|
@@ -63,7 +63,7 @@ class TaskAdherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
63
63
|
|
|
64
64
|
_PROMPTY_FILE = "task_adherence.prompty"
|
|
65
65
|
_RESULT_KEY = "task_adherence"
|
|
66
|
-
_OPTIONAL_PARAMS = []
|
|
66
|
+
_OPTIONAL_PARAMS = ["tool_definitions"]
|
|
67
67
|
|
|
68
68
|
_DEFAULT_TASK_ADHERENCE_SCORE = 0
|
|
69
69
|
|
|
@@ -175,7 +175,7 @@ class TaskAdherenceEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
175
175
|
if isinstance(content, list):
|
|
176
176
|
for item in content:
|
|
177
177
|
if isinstance(item, dict):
|
|
178
|
-
if item.get("type")
|
|
178
|
+
if item.get("type", None) in ("text", "input_text", "output_text"):
|
|
179
179
|
assistant_parts.append(item.get("text", ""))
|
|
180
180
|
elif item.get("type") == "tool_call":
|
|
181
181
|
tool_parts.append(str(item.get("tool_call", "")))
|
{azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5/azure_ai_evaluation.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.13.
|
|
3
|
+
Version: 1.13.5
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -418,6 +418,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
418
418
|
|
|
419
419
|
# Release History
|
|
420
420
|
|
|
421
|
+
## 1.13.5 (2025-11-10)
|
|
422
|
+
|
|
423
|
+
### Bugs Fixed
|
|
424
|
+
|
|
425
|
+
- **TaskAdherenceEvaluator:** treat tool definitions as optional so evaluations with only query/response inputs no longer raise “Either 'conversation' or individual inputs must be provided.”
|
|
426
|
+
|
|
421
427
|
## 1.13.4 (2025-11-10)
|
|
422
428
|
|
|
423
429
|
### Bugs Fixed
|
{azure_ai_evaluation-1.13.4 → azure_ai_evaluation-1.13.5}/azure_ai_evaluation.egg-info/SOURCES.txt
RENAMED
|
@@ -327,13 +327,22 @@ samples/red_team_samples.py
|
|
|
327
327
|
samples/red_team_skip_upload.py
|
|
328
328
|
samples/semantic_kernel_red_team_agent_sample.py
|
|
329
329
|
samples/agent_evaluators/agent_evaluation.ipynb
|
|
330
|
+
samples/agent_evaluators/coherence.ipynb
|
|
331
|
+
samples/agent_evaluators/fluency.ipynb
|
|
332
|
+
samples/agent_evaluators/groundedness.ipynb
|
|
330
333
|
samples/agent_evaluators/instructions.md
|
|
331
334
|
samples/agent_evaluators/intent_resolution.ipynb
|
|
335
|
+
samples/agent_evaluators/relevance.ipynb
|
|
332
336
|
samples/agent_evaluators/response_completeness.ipynb
|
|
333
337
|
samples/agent_evaluators/sample_synthetic_conversations.jsonl
|
|
334
338
|
samples/agent_evaluators/task_adherence.ipynb
|
|
339
|
+
samples/agent_evaluators/task_completion.ipynb
|
|
335
340
|
samples/agent_evaluators/task_navigation_efficiency.ipynb
|
|
336
341
|
samples/agent_evaluators/tool_call_accuracy.ipynb
|
|
342
|
+
samples/agent_evaluators/tool_input_accuracy.ipynb
|
|
343
|
+
samples/agent_evaluators/tool_output_utilization.ipynb
|
|
344
|
+
samples/agent_evaluators/tool_selection.ipynb
|
|
345
|
+
samples/agent_evaluators/tool_success.ipynb
|
|
337
346
|
samples/agent_evaluators/user_functions.py
|
|
338
347
|
samples/data/custom_objectives_with_context_example.json
|
|
339
348
|
samples/data/evaluate_test_data.jsonl
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"id": "da22a6e7",
|
|
6
|
+
"metadata": {},
|
|
7
|
+
"source": [
|
|
8
|
+
"# Coherence Evaluator\n",
|
|
9
|
+
"\n",
|
|
10
|
+
"### Getting Started\n",
|
|
11
|
+
"This sample demonstrates how to use the Coherence evaluator to assess the quality and readability of AI-generated responses. The evaluator measures the ability of the language model to generate text that reads naturally, flows smoothly, and resembles human-like language.\n",
|
|
12
|
+
"\n",
|
|
13
|
+
"Before you begin:\n",
|
|
14
|
+
"```bash\n",
|
|
15
|
+
"pip install azure-ai-evaluation\n",
|
|
16
|
+
"```\n",
|
|
17
|
+
"Set these environment variables with your own values:\n",
|
|
18
|
+
"1) **MODEL_DEPLOYMENT_NAME** - The deployment name of the model for this AI-assisted evaluator, as found under the \"Name\" column in the \"Models + endpoints\" tab in your Azure AI Foundry project.\n",
|
|
19
|
+
"2) **AZURE_OPENAI_ENDPOINT** - Azure Open AI Endpoint to be used for evaluation.\n",
|
|
20
|
+
"3) **AZURE_OPENAI_API_KEY** - Azure Open AI Key to be used for evaluation.\n",
|
|
21
|
+
"4) **AZURE_OPENAI_API_VERSION** - Azure Open AI Api version to be used for evaluation."
|
|
22
|
+
]
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"cell_type": "markdown",
|
|
26
|
+
"id": "48f8cb64",
|
|
27
|
+
"metadata": {},
|
|
28
|
+
"source": [
|
|
29
|
+
"The Coherence evaluator assesses the ability of the language model to generate text that reads naturally, flows smoothly, and resembles human-like language in its responses. It measures the readability and user-friendliness of the model's generated responses.\n",
|
|
30
|
+
"\n",
|
|
31
|
+
"Coherence scores range from 1 to 5:\n",
|
|
32
|
+
"\n",
|
|
33
|
+
"<pre>\n",
|
|
34
|
+
"Score 1: Very Poor - The response is completely incoherent with no logical flow or connection between ideas.\n",
|
|
35
|
+
"Score 2: Poor - The response has significant coherence issues with disjointed ideas and unclear transitions.\n",
|
|
36
|
+
"Score 3: Fair - The response is somewhat coherent but has noticeable gaps in logical flow or organization.\n",
|
|
37
|
+
"Score 4: Good - The response is mostly coherent with good flow and logical connections between ideas.\n",
|
|
38
|
+
"Score 5: Excellent - The response is perfectly coherent with smooth, natural flow and clear logical progression.\n",
|
|
39
|
+
"</pre>\n",
|
|
40
|
+
"\n",
|
|
41
|
+
"The evaluation requires one of the following input patterns:\n",
|
|
42
|
+
"\n",
|
|
43
|
+
"**Pattern 1: Query-Response Evaluation**\n",
|
|
44
|
+
"- Query: The user's question or prompt. (string)\n",
|
|
45
|
+
"- Response: The AI-generated response to be evaluated for coherence. (string)\n",
|
|
46
|
+
"\n",
|
|
47
|
+
"**Pattern 2: Conversation Evaluation**\n",
|
|
48
|
+
"- Conversation: A multi-turn conversation where each turn is evaluated for coherence. (dict with \"messages\" key)"
|
|
49
|
+
]
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"cell_type": "markdown",
|
|
53
|
+
"id": "542c1340",
|
|
54
|
+
"metadata": {},
|
|
55
|
+
"source": [
|
|
56
|
+
"### Initialize Coherence Evaluator"
|
|
57
|
+
]
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"cell_type": "code",
|
|
61
|
+
"execution_count": null,
|
|
62
|
+
"id": "35799119",
|
|
63
|
+
"metadata": {},
|
|
64
|
+
"outputs": [],
|
|
65
|
+
"source": [
|
|
66
|
+
"import os\n",
|
|
67
|
+
"from azure.ai.evaluation import CoherenceEvaluator, AzureOpenAIModelConfiguration\n",
|
|
68
|
+
"from pprint import pprint\n",
|
|
69
|
+
"\n",
|
|
70
|
+
"model_config = AzureOpenAIModelConfiguration(\n",
|
|
71
|
+
" azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\n",
|
|
72
|
+
" api_key=os.environ[\"AZURE_OPENAI_API_KEY\"],\n",
|
|
73
|
+
" api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n",
|
|
74
|
+
" azure_deployment=os.environ[\"MODEL_DEPLOYMENT_NAME\"],\n",
|
|
75
|
+
")\n",
|
|
76
|
+
"\n",
|
|
77
|
+
"coherence_evaluator = CoherenceEvaluator(model_config=model_config)"
|
|
78
|
+
]
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"cell_type": "markdown",
|
|
82
|
+
"id": "b10aff00",
|
|
83
|
+
"metadata": {},
|
|
84
|
+
"source": [
|
|
85
|
+
"### Samples"
|
|
86
|
+
]
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"cell_type": "markdown",
|
|
90
|
+
"id": "724adfc8",
|
|
91
|
+
"metadata": {},
|
|
92
|
+
"source": [
|
|
93
|
+
"#### Query and Response as Strings (str)"
|
|
94
|
+
]
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"cell_type": "code",
|
|
98
|
+
"execution_count": null,
|
|
99
|
+
"id": "82a1be93",
|
|
100
|
+
"metadata": {},
|
|
101
|
+
"outputs": [],
|
|
102
|
+
"source": [
|
|
103
|
+
"query = \"Can you explain how machine learning works?\"\n",
|
|
104
|
+
"response = \"Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed. It works by analyzing data patterns to make predictions or decisions. First, the system is trained on a dataset, which allows it to identify patterns and relationships. Then, it uses these learned patterns to make predictions on new, unseen data. The accuracy improves over time as the model processes more data and refines its understanding.\"\n",
|
|
105
|
+
"\n",
|
|
106
|
+
"result = coherence_evaluator(query=query, response=response)\n",
|
|
107
|
+
"pprint(result)"
|
|
108
|
+
]
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"cell_type": "markdown",
|
|
112
|
+
"id": "bc250127",
|
|
113
|
+
"metadata": {},
|
|
114
|
+
"source": [
|
|
115
|
+
"#### Example of Poor Coherence"
|
|
116
|
+
]
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"cell_type": "code",
|
|
120
|
+
"execution_count": null,
|
|
121
|
+
"id": "62ad5478",
|
|
122
|
+
"metadata": {},
|
|
123
|
+
"outputs": [],
|
|
124
|
+
"source": [
|
|
125
|
+
"# Poor coherence example\n",
|
|
126
|
+
"query = \"What is blockchain technology?\"\n",
|
|
127
|
+
"response = \"Blockchain is decentralized. Bitcoin uses it. Blocks contain data. Cryptography is involved. Banks might use blockchain. It's secure because of the chain. Mining is how new blocks are added. Digital currencies exist. The ledger is distributed.\"\n",
|
|
128
|
+
"\n",
|
|
129
|
+
"result = coherence_evaluator(query=query, response=response)\n",
|
|
130
|
+
"pprint(result)"
|
|
131
|
+
]
|
|
132
|
+
},
|
|
133
|
+
{
|
|
134
|
+
"cell_type": "markdown",
|
|
135
|
+
"id": "1d22e3d6",
|
|
136
|
+
"metadata": {},
|
|
137
|
+
"source": [
|
|
138
|
+
"#### Conversation as Dict with Messages"
|
|
139
|
+
]
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
"cell_type": "code",
|
|
143
|
+
"execution_count": null,
|
|
144
|
+
"id": "556c197b",
|
|
145
|
+
"metadata": {},
|
|
146
|
+
"outputs": [],
|
|
147
|
+
"source": [
|
|
148
|
+
"# Multi-turn conversation\n",
|
|
149
|
+
"conversation = {\n",
|
|
150
|
+
" \"messages\": [\n",
|
|
151
|
+
" {\n",
|
|
152
|
+
" \"role\": \"user\",\n",
|
|
153
|
+
" \"content\": \"I'm planning a trip to Japan. What should I know?\"\n",
|
|
154
|
+
" },\n",
|
|
155
|
+
" {\n",
|
|
156
|
+
" \"role\": \"assistant\",\n",
|
|
157
|
+
" \"content\": \"Japan is a fascinating destination with a rich cultural heritage. First, you should know that the best times to visit are spring (March-May) for cherry blossoms and fall (September-November) for autumn colors. The country has an excellent public transportation system, particularly the Shinkansen bullet train network. Additionally, learning a few basic Japanese phrases will be appreciated by locals, though many signs in major cities have English translations.\"\n",
|
|
158
|
+
" },\n",
|
|
159
|
+
" {\n",
|
|
160
|
+
" \"role\": \"user\",\n",
|
|
161
|
+
" \"content\": \"What about the food?\"\n",
|
|
162
|
+
" },\n",
|
|
163
|
+
" {\n",
|
|
164
|
+
" \"role\": \"assistant\",\n",
|
|
165
|
+
" \"content\": \"Japanese cuisine is incredibly diverse and delicious. Beyond the well-known sushi and ramen, you should try regional specialties like okonomiyaki in Osaka or motsunabe in Fukuoka. Most restaurants display plastic food models in their windows, which makes ordering easier even with a language barrier. It's also important to note that tipping is not customary in Japan and can sometimes be considered rude.\"\n",
|
|
166
|
+
" },\n",
|
|
167
|
+
" {\n",
|
|
168
|
+
" \"role\": \"user\",\n",
|
|
169
|
+
" \"content\": \"Are there any cultural customs I should be aware of?\"\n",
|
|
170
|
+
" },\n",
|
|
171
|
+
" {\n",
|
|
172
|
+
" \"role\": \"assistant\",\n",
|
|
173
|
+
" \"content\": \"Yes, there are several important customs to observe. Always remove your shoes when entering homes, temples, and some traditional restaurants. Bowing is a common greeting, though a slight nod is acceptable for foreigners. When using chopsticks, never stick them upright in rice as this resembles a funeral ritual. Finally, be mindful of noise levels on public transportation, as talking on phones or having loud conversations is considered impolite.\"\n",
|
|
174
|
+
" }\n",
|
|
175
|
+
" ]\n",
|
|
176
|
+
"}\n",
|
|
177
|
+
"\n",
|
|
178
|
+
"result = coherence_evaluator(conversation=conversation)\n",
|
|
179
|
+
"pprint(result)"
|
|
180
|
+
]
|
|
181
|
+
}
|
|
182
|
+
],
|
|
183
|
+
"metadata": {
|
|
184
|
+
"kernelspec": {
|
|
185
|
+
"display_name": "test_agent_evaluator_prp",
|
|
186
|
+
"language": "python",
|
|
187
|
+
"name": "python3"
|
|
188
|
+
},
|
|
189
|
+
"language_info": {
|
|
190
|
+
"codemirror_mode": {
|
|
191
|
+
"name": "ipython",
|
|
192
|
+
"version": 3
|
|
193
|
+
},
|
|
194
|
+
"file_extension": ".py",
|
|
195
|
+
"mimetype": "text/x-python",
|
|
196
|
+
"name": "python",
|
|
197
|
+
"nbconvert_exporter": "python",
|
|
198
|
+
"pygments_lexer": "ipython3",
|
|
199
|
+
"version": "3.12.9"
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
"nbformat": 4,
|
|
203
|
+
"nbformat_minor": 5
|
|
204
|
+
}
|