azure-ai-evaluation 1.2.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/CHANGELOG.md +6 -0
- {azure_ai_evaluation-1.2.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.3.0}/PKG-INFO +7 -1
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/__init__.py +1 -15
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/utils.py +8 -8
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_constants.py +3 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_evaluate.py +5 -2
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_exceptions.py +0 -1
- azure_ai_evaluation-1.3.0/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +640 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_version.py +2 -1
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +10 -3
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +4 -5
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -0
- azure_ai_evaluation-1.3.0/azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0/azure_ai_evaluation.egg-info}/PKG-INFO +7 -1
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/SOURCES.txt +5 -8
- azure_ai_evaluation-1.3.0/migration_guide.md +243 -0
- azure_ai_evaluation-1.3.0/samples/evaluation_samples_safety_evaluation.py +251 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/setup.py +1 -1
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/conftest.py +2 -1
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_adv_simulator.py +1 -2
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_builtin_evaluators.py +0 -16
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_mass_evaluate.py +9 -36
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_sim_and_eval.py +8 -5
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate.py +17 -4
- azure_ai_evaluation-1.3.0/tests/unittests/test_safety_evaluation.py +215 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_synthetic_callback_conv_bot.py +5 -4
- azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
- azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
- azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
- azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
- azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
- azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
- azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
- azure_ai_evaluation-1.2.0/azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/MANIFEST.in +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/README.md +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/TROUBLESHOOTING.md +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_clients.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_models.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/constants.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/math.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/rai_service.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_eval_run.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_utils.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_http_utils.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_model_configurations.py +0 -0
- {azure_ai_evaluation-1.2.0/azure/ai/evaluation/_vendor → azure_ai_evaluation-1.3.0/azure/ai/evaluation/_safety_evaluation}/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_user_agent.py +0 -0
- {azure_ai_evaluation-1.2.0/azure/ai/evaluation/simulator/_data_sources → azure_ai_evaluation-1.3.0/azure/ai/evaluation/_vendor}/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/py.typed +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_constants.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_simulator.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_tracing.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/requires.txt +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/pyproject.toml +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/samples/README.md +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/samples/data/evaluate_test_data.jsonl +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_common.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_evaluate.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_simulate.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/setup.cfg +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/__openai_patcher.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/__init__.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/target_fn.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_evaluate.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_lite_management_client.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_metrics_upload.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_batch_run_context.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_built_in_evaluator.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_content_safety_rai_script.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_eval_run.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate_performance.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate_telemetry.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/apology_dag/apology.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/slow_eval.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_non_adv_simulator.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_save_eval.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_simulator.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
- {azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_utils.py +0 -0
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# Release History
|
|
2
2
|
|
|
3
|
+
## 1.3.0 (2025-02-28)
|
|
4
|
+
|
|
5
|
+
### Breaking Changes
|
|
6
|
+
- Multimodal specific evaluators `ContentSafetyMultimodalEvaluator`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` has been removed. Please use `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator` instead.
|
|
7
|
+
- Metric name in ProtectedMaterialEvaluator's output is changed from `protected_material.fictional_characters_label` to `protected_material.fictional_characters_defect_rate`. It's now consistent with other evaluator's metric names (ending with `_defect_rate`).
|
|
8
|
+
|
|
3
9
|
## 1.2.0 (2025-01-27)
|
|
4
10
|
|
|
5
11
|
### Features Added
|
{azure_ai_evaluation-1.2.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.3.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -377,6 +377,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
377
377
|
|
|
378
378
|
# Release History
|
|
379
379
|
|
|
380
|
+
## 1.3.0 (2025-02-28)
|
|
381
|
+
|
|
382
|
+
### Breaking Changes
|
|
383
|
+
- Multimodal specific evaluators `ContentSafetyMultimodalEvaluator`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` has been removed. Please use `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator` instead.
|
|
384
|
+
- Metric name in ProtectedMaterialEvaluator's output is changed from `protected_material.fictional_characters_label` to `protected_material.fictional_characters_defect_rate`. It's now consistent with other evaluator's metric names (ending with `_defect_rate`).
|
|
385
|
+
|
|
380
386
|
## 1.2.0 (2025-01-27)
|
|
381
387
|
|
|
382
388
|
### Features Added
|
|
@@ -12,14 +12,6 @@ from ._evaluators._content_safety import (
|
|
|
12
12
|
SexualEvaluator,
|
|
13
13
|
ViolenceEvaluator,
|
|
14
14
|
)
|
|
15
|
-
from ._evaluators._multimodal._content_safety_multimodal import (
|
|
16
|
-
ContentSafetyMultimodalEvaluator,
|
|
17
|
-
HateUnfairnessMultimodalEvaluator,
|
|
18
|
-
SelfHarmMultimodalEvaluator,
|
|
19
|
-
SexualMultimodalEvaluator,
|
|
20
|
-
ViolenceMultimodalEvaluator,
|
|
21
|
-
)
|
|
22
|
-
from ._evaluators._multimodal._protected_material import ProtectedMaterialMultimodalEvaluator
|
|
23
15
|
from ._evaluators._f1_score import F1ScoreEvaluator
|
|
24
16
|
from ._evaluators._fluency import FluencyEvaluator
|
|
25
17
|
from ._evaluators._gleu import GleuScoreEvaluator
|
|
@@ -72,11 +64,5 @@ __all__ = [
|
|
|
72
64
|
"EvaluatorConfig",
|
|
73
65
|
"Conversation",
|
|
74
66
|
"Message",
|
|
75
|
-
"EvaluationResult"
|
|
76
|
-
"ContentSafetyMultimodalEvaluator",
|
|
77
|
-
"HateUnfairnessMultimodalEvaluator",
|
|
78
|
-
"SelfHarmMultimodalEvaluator",
|
|
79
|
-
"SexualMultimodalEvaluator",
|
|
80
|
-
"ViolenceMultimodalEvaluator",
|
|
81
|
-
"ProtectedMaterialMultimodalEvaluator",
|
|
67
|
+
"EvaluationResult"
|
|
82
68
|
]
|
{azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/utils.py
RENAMED
|
@@ -366,7 +366,7 @@ def validate_conversation(conversation):
|
|
|
366
366
|
if not isinstance(messages, list):
|
|
367
367
|
raise_exception(
|
|
368
368
|
"'messages' parameter must be a JSON-compatible list of chat messages",
|
|
369
|
-
ErrorTarget.
|
|
369
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
370
370
|
)
|
|
371
371
|
expected_roles = {"user", "assistant", "system"}
|
|
372
372
|
image_found = False
|
|
@@ -393,7 +393,7 @@ def validate_conversation(conversation):
|
|
|
393
393
|
):
|
|
394
394
|
raise_exception(
|
|
395
395
|
f"Messages must be a strongly typed class of ChatRequestMessage. Message number: {num}",
|
|
396
|
-
ErrorTarget.
|
|
396
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
397
397
|
)
|
|
398
398
|
if isinstance(message, AssistantMessage):
|
|
399
399
|
assistant_message_count += 1
|
|
@@ -407,7 +407,7 @@ def validate_conversation(conversation):
|
|
|
407
407
|
if message.get("role") not in expected_roles:
|
|
408
408
|
raise_exception(
|
|
409
409
|
f"Invalid role provided: {message.get('role')}. Message number: {num}",
|
|
410
|
-
ErrorTarget.
|
|
410
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
411
411
|
)
|
|
412
412
|
if message.get("role") == "assistant":
|
|
413
413
|
assistant_message_count += 1
|
|
@@ -417,7 +417,7 @@ def validate_conversation(conversation):
|
|
|
417
417
|
if not isinstance(content, (str, list)):
|
|
418
418
|
raise_exception(
|
|
419
419
|
f"Content in each turn must be a string or array. Message number: {num}",
|
|
420
|
-
ErrorTarget.
|
|
420
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
421
421
|
)
|
|
422
422
|
if isinstance(content, list):
|
|
423
423
|
if any(item.get("type") == "image_url" and "url" in item.get("image_url", {}) for item in content):
|
|
@@ -425,21 +425,21 @@ def validate_conversation(conversation):
|
|
|
425
425
|
if not image_found:
|
|
426
426
|
raise_exception(
|
|
427
427
|
"Message needs to have multi-modal input like images.",
|
|
428
|
-
ErrorTarget.
|
|
428
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
429
429
|
)
|
|
430
430
|
if assistant_message_count == 0:
|
|
431
431
|
raise_exception(
|
|
432
432
|
"Assistant role required in one of the messages.",
|
|
433
|
-
ErrorTarget.
|
|
433
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
434
434
|
)
|
|
435
435
|
if user_message_count == 0:
|
|
436
436
|
raise_exception(
|
|
437
437
|
"User role required in one of the messages.",
|
|
438
|
-
ErrorTarget.
|
|
438
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
439
439
|
)
|
|
440
440
|
if assistant_message_count > 1:
|
|
441
441
|
raise_exception(
|
|
442
442
|
"Evaluators for multimodal conversations only support single turn. "
|
|
443
443
|
"User and assistant role expected as the only role in each message.",
|
|
444
|
-
ErrorTarget.
|
|
444
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
445
445
|
)
|
{azure_ai_evaluation-1.2.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_evaluate.py
RENAMED
|
@@ -147,6 +147,9 @@ def _aggregate_label_defect_metrics(df: pd.DataFrame) -> Tuple[List[str], Dict[s
|
|
|
147
147
|
"""
|
|
148
148
|
handled_metrics = [
|
|
149
149
|
EvaluationMetrics.PROTECTED_MATERIAL,
|
|
150
|
+
EvaluationMetrics.FICTIONAL_CHARACTERS,
|
|
151
|
+
EvaluationMetrics.ARTWORK,
|
|
152
|
+
EvaluationMetrics.LOGOS_AND_BRANDS,
|
|
150
153
|
_InternalEvaluationMetrics.ECI,
|
|
151
154
|
EvaluationMetrics.XPIA,
|
|
152
155
|
]
|
|
@@ -525,7 +528,7 @@ def _process_column_mappings(
|
|
|
525
528
|
|
|
526
529
|
processed_config: Dict[str, Dict[str, str]] = {}
|
|
527
530
|
|
|
528
|
-
|
|
531
|
+
expected_references = re.compile(r"^\$\{(target|data)\.[a-zA-Z_]+\}$")
|
|
529
532
|
|
|
530
533
|
if column_mapping:
|
|
531
534
|
for evaluator, mapping_config in column_mapping.items():
|
|
@@ -534,7 +537,7 @@ def _process_column_mappings(
|
|
|
534
537
|
|
|
535
538
|
for map_to_key, map_value in mapping_config.items():
|
|
536
539
|
# Check if there's any unexpected reference other than ${target.} or ${data.}
|
|
537
|
-
if
|
|
540
|
+
if not expected_references.search(map_value):
|
|
538
541
|
msg = "Unexpected references detected in 'column_mapping'. Ensure only ${target.} and ${data.} are used."
|
|
539
542
|
raise EvaluationException(
|
|
540
543
|
message=msg,
|
|
@@ -63,7 +63,6 @@ class ErrorTarget(Enum):
|
|
|
63
63
|
RAI_CLIENT = "RAIClient"
|
|
64
64
|
COHERENCE_EVALUATOR = "CoherenceEvaluator"
|
|
65
65
|
CONTENT_SAFETY_CHAT_EVALUATOR = "ContentSafetyEvaluator"
|
|
66
|
-
CONTENT_SAFETY_MULTIMODAL_EVALUATOR = "ContentSafetyMultimodalEvaluator"
|
|
67
66
|
ECI_EVALUATOR = "ECIEvaluator"
|
|
68
67
|
F1_EVALUATOR = "F1Evaluator"
|
|
69
68
|
GROUNDEDNESS_EVALUATOR = "GroundednessEvaluator"
|