azure-ai-evaluation 1.4.0__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/CHANGELOG.md +6 -0
- {azure_ai_evaluation-1.4.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.5.0}/PKG-INFO +13 -2
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/__init__.py +0 -16
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/rai_service.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/utils.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_converters/_ai_services.py +4 -4
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -4
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +42 -22
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_evaluate.py +84 -68
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +95 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_utils.py +3 -3
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +1 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/__init__.py +21 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_result.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_status.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_version.py +1 -1
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team/__init__.py +19 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_attack_objective_generator.py +3 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_attack_strategy.py +3 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_red_team.py +96 -67
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team/_red_team_result.py +382 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_utils/constants.py +2 -1
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_utils/formatting_utils.py +23 -22
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_utils/logging_utils.py +1 -1
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_utils/strategy_utils.py +8 -4
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_simulator.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0/azure_ai_evaluation.egg-info}/PKG-INFO +13 -2
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure_ai_evaluation.egg-info/SOURCES.txt +23 -15
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure_ai_evaluation.egg-info/requires.txt +6 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/instructions.md +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/task_adherence.ipynb +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/red_team_samples.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/setup.py +11 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/conftest.py +4 -35
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +3 -3
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_metrics_upload.py +10 -3
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_batch_run_context.py +6 -3
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluate.py +1 -7
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_attack_objective_generator.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_attack_strategy.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_callback_chat_target.py +2 -2
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_constants.py +3 -3
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_formatting_utils.py +4 -12
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_red_team.py +48 -41
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_red_team_result.py +1 -1
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/test_strategy_utils.py +16 -12
- azure_ai_evaluation-1.4.0/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -179
- azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team/_red_team_result.py +0 -246
- azure_ai_evaluation-1.4.0/azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -3
- azure_ai_evaluation-1.4.0/azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.4.0/tests/unittests/test_evaluate_telemetry.py +0 -168
- azure_ai_evaluation-1.4.0/tests/unittests/test_evaluators/apology_dag/apology.py +0 -8
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/MANIFEST.in +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/README.md +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/TROUBLESHOOTING.md +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_azure/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_azure/_clients.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_azure/_models.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/constants.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/math.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_client.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_configuration.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_model_base.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_patch.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_serialization.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/_version.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/_client.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/_configuration.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/_patch.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/models/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/models/_enums.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/models/_models.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/models/_patch.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/raiclient/py.typed +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_constants.py +0 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_converters}/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_converters/_models.py +0 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_converters → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_evaluate}/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +0 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_evaluate → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_evaluators}/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_exceptions.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_http_utils.py +0 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_evaluators → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy}/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_config.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_engine.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_logging.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_run.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/_connection.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/_exceptions.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/_prompty.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/_utils.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_model_configurations.py +0 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team/_utils → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_safety_evaluation}/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_user_agent.py +0 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_legacy → azure_ai_evaluation-1.5.0/azure/ai/evaluation/_vendor}/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/py.typed +0 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_callback_chat_target.py +0 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_red_team → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team}/_default_converter.py +0 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_safety_evaluation → azure_ai_evaluation-1.5.0/azure/ai/evaluation/red_team/_utils}/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_constants.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
- {azure_ai_evaluation-1.4.0/azure/ai/evaluation/_vendor → azure_ai_evaluation-1.5.0/azure/ai/evaluation/simulator/_data_sources}/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/migration_guide.md +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/pyproject.toml +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/README.md +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/agent_evaluation.ipynb +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/intent_resolution.ipynb +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/response_completeness.ipynb +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/sample_synthetic_conversations.jsonl +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/tool_call_accuracy.ipynb +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/agent_evaluators/user_functions.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/data/evaluate_test_data.jsonl +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/evaluation_samples_common.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/evaluation_samples_evaluate.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/evaluation_samples_safety_evaluation.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/evaluation_samples_simulate.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/samples/evaluation_samples_threshold.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/setup.cfg +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/__openai_patcher.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/converters/ai_agent_converter/serialization_helper.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/target_fn.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_adv_simulator.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_builtin_evaluators.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_evaluate.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_lite_management_client.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_mass_evaluate.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_prompty_async.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/e2etests/test_sim_and_eval.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_agent_evaluators.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_built_in_evaluator.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_completeness_evaluator.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_content_safety_rai_script.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_eval_run.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluate_performance.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluators/slow_eval.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluators/test_conversation_thresholds.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluators/test_service_evaluator_thresholds.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_evaluators/test_threshold_behavior.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_non_adv_simulator.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_redteam/__init__.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_safety_evaluation.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_save_eval.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_simulator.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
- {azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/tests/unittests/test_utils.py +0 -0
{azure_ai_evaluation-1.4.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.5.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -28,8 +28,13 @@ Requires-Dist: azure-identity>=1.16.0
|
|
|
28
28
|
Requires-Dist: azure-core>=1.30.2
|
|
29
29
|
Requires-Dist: nltk>=3.9.1
|
|
30
30
|
Requires-Dist: azure-storage-blob>=12.10.0
|
|
31
|
+
Requires-Dist: httpx>=0.25.1
|
|
32
|
+
Requires-Dist: pandas<3.0.0,>=2.1.2
|
|
33
|
+
Requires-Dist: openai>=1.40.0
|
|
34
|
+
Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
|
|
35
|
+
Requires-Dist: msrest>=0.6.21
|
|
31
36
|
Provides-Extra: redteam
|
|
32
|
-
Requires-Dist: pyrit
|
|
37
|
+
Requires-Dist: pyrit==0.8.1; extra == "redteam"
|
|
33
38
|
|
|
34
39
|
# Azure AI Evaluation client library for Python
|
|
35
40
|
|
|
@@ -376,6 +381,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
376
381
|
|
|
377
382
|
# Release History
|
|
378
383
|
|
|
384
|
+
## 1.5.0 (2025-04-04)
|
|
385
|
+
|
|
386
|
+
### Features Added
|
|
387
|
+
|
|
388
|
+
- New `RedTeam` agent functionality to assess the safety and resilience of AI systems against adversarial prompt attacks
|
|
389
|
+
|
|
379
390
|
## 1.4.0 (2025-03-27)
|
|
380
391
|
|
|
381
392
|
### Features Added
|
|
@@ -52,22 +52,6 @@ try:
|
|
|
52
52
|
except ImportError:
|
|
53
53
|
print("[INFO] Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`.")
|
|
54
54
|
|
|
55
|
-
# RedTeam requires a dependency on pyrit, but python 3.9 is not supported by pyrit.
|
|
56
|
-
# So we only import it if it's available and the user has pyrit.
|
|
57
|
-
try:
|
|
58
|
-
from ._red_team._red_team import RedTeam
|
|
59
|
-
from ._red_team._attack_strategy import AttackStrategy
|
|
60
|
-
from ._red_team._attack_objective_generator import RiskCategory
|
|
61
|
-
from ._red_team._red_team_result import RedTeamOutput
|
|
62
|
-
_patch_all.extend([
|
|
63
|
-
"RedTeam",
|
|
64
|
-
"RedTeamOutput",
|
|
65
|
-
"AttackStrategy",
|
|
66
|
-
"RiskCategory",
|
|
67
|
-
])
|
|
68
|
-
except ImportError:
|
|
69
|
-
print("[INFO] Could not import RedTeam. Please install the dependency with `pip install azure-ai-evaluation[redteam]`.")
|
|
70
|
-
|
|
71
55
|
|
|
72
56
|
__all__ = [
|
|
73
57
|
"evaluate",
|
{azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/rai_service.py
RENAMED
|
@@ -15,7 +15,7 @@ from string import Template
|
|
|
15
15
|
|
|
16
16
|
import jwt
|
|
17
17
|
|
|
18
|
-
from
|
|
18
|
+
from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
|
|
19
19
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
20
20
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
|
|
21
21
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
{azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_common/utils.py
RENAMED
|
@@ -9,7 +9,7 @@ from typing import Any, List, Literal, Mapping, Type, TypeVar, Tuple, Union, cas
|
|
|
9
9
|
|
|
10
10
|
import nltk
|
|
11
11
|
from typing_extensions import NotRequired, Required, TypeGuard
|
|
12
|
-
from
|
|
12
|
+
from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
|
|
13
13
|
from azure.ai.evaluation._constants import AZURE_OPENAI_TYPE, OPENAI_TYPE
|
|
14
14
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
15
15
|
from azure.ai.evaluation._model_configurations import (
|
|
@@ -667,7 +667,7 @@ class AIAgentConverter:
|
|
|
667
667
|
return evaluations
|
|
668
668
|
|
|
669
669
|
@staticmethod
|
|
670
|
-
def
|
|
670
|
+
def _run_ids_from_conversation(conversation: dict) -> List[str]:
|
|
671
671
|
"""
|
|
672
672
|
Extracts a list of unique run IDs from a conversation dictionary.
|
|
673
673
|
|
|
@@ -684,7 +684,7 @@ class AIAgentConverter:
|
|
|
684
684
|
return run_ids
|
|
685
685
|
|
|
686
686
|
@staticmethod
|
|
687
|
-
def
|
|
687
|
+
def _convert_from_conversation(
|
|
688
688
|
conversation: dict, run_id: str, exclude_tool_calls_previous_runs: bool = False
|
|
689
689
|
) -> dict:
|
|
690
690
|
"""
|
|
@@ -765,7 +765,7 @@ class AIAgentConverter:
|
|
|
765
765
|
return json.loads(final_result.to_json())
|
|
766
766
|
|
|
767
767
|
@staticmethod
|
|
768
|
-
def
|
|
768
|
+
def _convert_from_file(filename: str, run_id: str) -> dict:
|
|
769
769
|
"""
|
|
770
770
|
Converts the agent run from a JSON file to a format suitable for the OpenAI API, the JSON file being a thread.
|
|
771
771
|
|
|
@@ -801,4 +801,4 @@ class AIAgentConverter:
|
|
|
801
801
|
with open(filename, mode="r", encoding="utf-8") as file:
|
|
802
802
|
data = json.load(file)
|
|
803
803
|
|
|
804
|
-
return AIAgentConverter.
|
|
804
|
+
return AIAgentConverter._convert_from_conversation(data, run_id)
|
|
@@ -6,17 +6,17 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
import os
|
|
8
8
|
from concurrent.futures import Future
|
|
9
|
-
from
|
|
10
|
-
from typing import Any, Callable, Dict, Optional, Union, cast
|
|
9
|
+
from typing import Any, Callable, Dict, Optional, Sequence, Union, cast
|
|
11
10
|
|
|
12
11
|
import pandas as pd
|
|
13
|
-
from
|
|
14
|
-
from
|
|
12
|
+
from azure.ai.evaluation._legacy._adapters.types import AttrDict
|
|
13
|
+
from azure.ai.evaluation._legacy._adapters.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
|
|
15
14
|
|
|
16
15
|
from azure.ai.evaluation._evaluate._utils import _apply_column_mapping, _has_aggregator, get_int_env_var, load_jsonl
|
|
17
16
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
18
17
|
|
|
19
18
|
from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT
|
|
19
|
+
from .batch_clients import BatchClientRun
|
|
20
20
|
|
|
21
21
|
LOGGER = logging.getLogger(__name__)
|
|
22
22
|
|
|
@@ -84,7 +84,7 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
84
84
|
for param in inspect.signature(evaluator).parameters.values()
|
|
85
85
|
if param.name not in ["args", "kwargs"]
|
|
86
86
|
}
|
|
87
|
-
for value in input_df.to_dict("records"):
|
|
87
|
+
for value in cast(Sequence[Dict[str, Any]], input_df.to_dict("records")):
|
|
88
88
|
# Filter out only the parameters that are present in the input data
|
|
89
89
|
# if no parameters then pass data as is
|
|
90
90
|
filtered_values = {k: v for k, v in value.items() if k in parameters} if len(parameters) > 0 else value
|
|
@@ -133,10 +133,10 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
133
133
|
def run(
|
|
134
134
|
self, # pylint: disable=unused-argument
|
|
135
135
|
flow: Callable,
|
|
136
|
-
data: Union[os.PathLike,
|
|
137
|
-
evaluator_name: Optional[str] = None,
|
|
136
|
+
data: Union[str, os.PathLike, pd.DataFrame],
|
|
138
137
|
column_mapping: Optional[Dict[str, str]] = None,
|
|
139
|
-
|
|
138
|
+
evaluator_name: Optional[str] = None,
|
|
139
|
+
**kwargs: Any,
|
|
140
140
|
) -> CodeRun:
|
|
141
141
|
input_df = data
|
|
142
142
|
if not isinstance(input_df, pd.DataFrame):
|
|
@@ -157,7 +157,7 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
157
157
|
evaluator=flow,
|
|
158
158
|
input_df=input_df,
|
|
159
159
|
column_mapping=column_mapping,
|
|
160
|
-
evaluator_name=evaluator_name,
|
|
160
|
+
evaluator_name=evaluator_name or "",
|
|
161
161
|
)
|
|
162
162
|
|
|
163
163
|
return CodeRun(
|
|
@@ -169,11 +169,13 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
169
169
|
),
|
|
170
170
|
)
|
|
171
171
|
|
|
172
|
-
def get_details(self,
|
|
172
|
+
def get_details(self, client_run: BatchClientRun, all_results: bool = False) -> pd.DataFrame:
|
|
173
|
+
run = self._get_result(client_run)
|
|
173
174
|
result_df = run.get_result_df(exclude_inputs=not all_results)
|
|
174
175
|
return result_df
|
|
175
176
|
|
|
176
|
-
def get_metrics(self,
|
|
177
|
+
def get_metrics(self, client_run: BatchClientRun) -> Dict[str, Any]:
|
|
178
|
+
run = self._get_result(client_run)
|
|
177
179
|
try:
|
|
178
180
|
aggregated_metrics = run.get_aggregated_metrics()
|
|
179
181
|
print("Aggregated metrics")
|
|
@@ -183,6 +185,10 @@ class CodeClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
183
185
|
return {}
|
|
184
186
|
return aggregated_metrics
|
|
185
187
|
|
|
186
|
-
def get_run_summary(self,
|
|
188
|
+
def get_run_summary(self, client_run: BatchClientRun) -> Any: # pylint: disable=unused-argument
|
|
187
189
|
# Not implemented
|
|
188
190
|
return None
|
|
191
|
+
|
|
192
|
+
@staticmethod
|
|
193
|
+
def _get_result(run: BatchClientRun) -> CodeRun:
|
|
194
|
+
return cast(CodeRun, run)
|
|
@@ -5,9 +5,9 @@ import os
|
|
|
5
5
|
import types
|
|
6
6
|
from typing import Optional, Type, Union
|
|
7
7
|
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
-
from
|
|
8
|
+
from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
|
|
9
|
+
from azure.ai.evaluation._legacy._adapters.utils import ClientUserAgentUtil
|
|
10
|
+
from azure.ai.evaluation._legacy._adapters.tracing import inject_openai_api, recover_openai_api
|
|
11
11
|
|
|
12
12
|
from azure.ai.evaluation._constants import (
|
|
13
13
|
OTEL_EXPORTER_OTLP_TRACES_TIMEOUT,
|
|
@@ -19,6 +19,8 @@ from azure.ai.evaluation._constants import (
|
|
|
19
19
|
|
|
20
20
|
from ..._user_agent import USER_AGENT
|
|
21
21
|
from .._utils import set_event_loop_policy
|
|
22
|
+
from .batch_clients import BatchClient
|
|
23
|
+
from ._run_submitter_client import RunSubmitterClient
|
|
22
24
|
from .code_client import CodeClient
|
|
23
25
|
from .proxy_client import ProxyClient
|
|
24
26
|
|
|
@@ -33,7 +35,7 @@ class EvalRunContext:
|
|
|
33
35
|
]
|
|
34
36
|
"""
|
|
35
37
|
|
|
36
|
-
def __init__(self, client:
|
|
38
|
+
def __init__(self, client: BatchClient) -> None:
|
|
37
39
|
self.client = client
|
|
38
40
|
self._is_batch_timeout_set_by_system = False
|
|
39
41
|
self._is_otel_timeout_set_by_system = False
|
|
@@ -64,6 +66,9 @@ class EvalRunContext:
|
|
|
64
66
|
# For addressing the issue of asyncio event loop closed on Windows
|
|
65
67
|
set_event_loop_policy()
|
|
66
68
|
|
|
69
|
+
if isinstance(self.client, RunSubmitterClient):
|
|
70
|
+
set_event_loop_policy()
|
|
71
|
+
|
|
67
72
|
def __exit__(
|
|
68
73
|
self,
|
|
69
74
|
exc_type: Optional[Type[BaseException]],
|
|
@@ -8,15 +8,21 @@ import inspect
|
|
|
8
8
|
import logging
|
|
9
9
|
import math
|
|
10
10
|
import os
|
|
11
|
+
from datetime import datetime
|
|
11
12
|
from collections import OrderedDict
|
|
12
13
|
from concurrent.futures import Future
|
|
13
|
-
from typing import Any, Callable, Dict, Optional, Union
|
|
14
|
+
from typing import Any, Callable, Dict, Optional, Union, cast
|
|
14
15
|
|
|
16
|
+
from azure.ai.evaluation._legacy._adapters.entities import Run
|
|
17
|
+
from azure.ai.evaluation._legacy._adapters._configuration import Configuration
|
|
18
|
+
from azure.ai.evaluation._legacy._adapters.client import PFClient
|
|
19
|
+
from azure.ai.evaluation._legacy._adapters.tracing import ThreadPoolExecutorWithContext
|
|
15
20
|
import pandas as pd
|
|
16
|
-
from promptflow.client import PFClient
|
|
17
|
-
from promptflow.entities import Run
|
|
18
|
-
from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
|
|
19
21
|
|
|
22
|
+
from azure.ai.evaluation._evaluate._batch_run.batch_clients import BatchClientRun, HasAsyncCallable
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
Configuration.get_instance().set_config("trace.destination", "none")
|
|
20
26
|
LOGGER = logging.getLogger(__name__)
|
|
21
27
|
|
|
22
28
|
|
|
@@ -26,46 +32,56 @@ class ProxyRun:
|
|
|
26
32
|
|
|
27
33
|
|
|
28
34
|
class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
|
|
29
|
-
def __init__( # pylint: disable=missing-client-constructor-parameter-credential
|
|
30
|
-
self,
|
|
35
|
+
def __init__( # pylint: disable=missing-client-constructor-parameter-credential
|
|
36
|
+
self,
|
|
37
|
+
**kwargs: Any,
|
|
31
38
|
) -> None:
|
|
32
|
-
self._pf_client =
|
|
33
|
-
self._thread_pool =
|
|
39
|
+
self._pf_client = PFClient(**kwargs)
|
|
40
|
+
self._thread_pool = ThreadPoolExecutorWithContext(thread_name_prefix="evaluators_thread")
|
|
34
41
|
|
|
35
42
|
def run(
|
|
36
43
|
self,
|
|
37
|
-
flow:
|
|
38
|
-
data: Union[str, os.PathLike],
|
|
44
|
+
flow: Callable,
|
|
45
|
+
data: Union[str, os.PathLike, pd.DataFrame],
|
|
39
46
|
column_mapping: Optional[Dict[str, str]] = None,
|
|
40
|
-
|
|
47
|
+
evaluator_name: Optional[str] = None,
|
|
48
|
+
**kwargs: Any,
|
|
41
49
|
) -> ProxyRun:
|
|
42
|
-
|
|
43
|
-
|
|
50
|
+
if isinstance(data, pd.DataFrame):
|
|
51
|
+
raise ValueError("Data cannot be a pandas DataFrame")
|
|
52
|
+
|
|
53
|
+
flow_to_run: Callable = flow
|
|
54
|
+
if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true" and isinstance(flow, HasAsyncCallable):
|
|
44
55
|
flow_to_run = flow._to_async() # pylint: disable=protected-access
|
|
45
56
|
|
|
57
|
+
name: str = kwargs.pop("name", "")
|
|
58
|
+
if not name:
|
|
59
|
+
name = f"azure_ai_evaluation_evaluators_{evaluator_name}_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}"
|
|
60
|
+
|
|
46
61
|
batch_use_async = self._should_batch_use_async(flow_to_run)
|
|
47
62
|
eval_future = self._thread_pool.submit(
|
|
48
63
|
self._pf_client.run,
|
|
49
64
|
flow_to_run,
|
|
50
65
|
data=data,
|
|
51
|
-
column_mapping=column_mapping,
|
|
66
|
+
column_mapping=column_mapping, # type: ignore
|
|
52
67
|
batch_use_async=batch_use_async,
|
|
53
|
-
|
|
68
|
+
name=name,
|
|
69
|
+
**kwargs,
|
|
54
70
|
)
|
|
55
71
|
return ProxyRun(run=eval_future)
|
|
56
72
|
|
|
57
|
-
def get_details(self,
|
|
58
|
-
run: Run =
|
|
73
|
+
def get_details(self, client_run: BatchClientRun, all_results: bool = False) -> pd.DataFrame:
|
|
74
|
+
run: Run = self.get_result(client_run)
|
|
59
75
|
result_df = self._pf_client.get_details(run, all_results=all_results)
|
|
60
76
|
result_df.replace("(Failed)", math.nan, inplace=True)
|
|
61
77
|
return result_df
|
|
62
78
|
|
|
63
|
-
def get_metrics(self,
|
|
64
|
-
run: Run =
|
|
79
|
+
def get_metrics(self, client_run: BatchClientRun) -> Dict[str, Any]:
|
|
80
|
+
run: Run = self.get_result(client_run)
|
|
65
81
|
return self._pf_client.get_metrics(run)
|
|
66
82
|
|
|
67
|
-
def get_run_summary(self,
|
|
68
|
-
run =
|
|
83
|
+
def get_run_summary(self, client_run: BatchClientRun) -> Dict[str, Any]:
|
|
84
|
+
run: Run = self.get_result(client_run)
|
|
69
85
|
|
|
70
86
|
# pylint: disable=protected-access
|
|
71
87
|
completed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")
|
|
@@ -81,13 +97,17 @@ class ProxyClient: # pylint: disable=client-accepts-api-version-keyword
|
|
|
81
97
|
return OrderedDict(
|
|
82
98
|
[
|
|
83
99
|
("status", status),
|
|
84
|
-
("duration", str(run._end_time - run._created_on)),
|
|
100
|
+
("duration", str((run._end_time or run._created_on) - run._created_on)),
|
|
85
101
|
("completed_lines", completed_lines),
|
|
86
102
|
("failed_lines", failed_lines),
|
|
87
103
|
("log_path", str(run._output_path)),
|
|
88
104
|
]
|
|
89
105
|
)
|
|
90
106
|
|
|
107
|
+
@staticmethod
|
|
108
|
+
def get_result(run: BatchClientRun) -> Run:
|
|
109
|
+
return cast(ProxyRun, run).run.result()
|
|
110
|
+
|
|
91
111
|
@staticmethod
|
|
92
112
|
def _should_batch_use_async(flow):
|
|
93
113
|
if os.getenv("AI_EVALS_BATCH_USE_ASYNC", "true").lower() == "true":
|
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
import types
|
|
6
6
|
from typing import Optional, Type
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP
|
|
9
9
|
from azure.ai.evaluation._constants import PF_DISABLE_TRACING
|
|
10
10
|
|
|
11
11
|
|
{azure_ai_evaluation-1.4.0 → azure_ai_evaluation-1.5.0}/azure/ai/evaluation/_evaluate/_eval_run.py
RENAMED
|
@@ -13,7 +13,7 @@ import uuid
|
|
|
13
13
|
from typing import Any, Dict, List, Optional, Set, Type
|
|
14
14
|
from urllib.parse import urlparse
|
|
15
15
|
|
|
16
|
-
from
|
|
16
|
+
from azure.ai.evaluation._legacy._adapters.entities import Run
|
|
17
17
|
from typing_extensions import Self
|
|
18
18
|
|
|
19
19
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|