azure-ai-evaluation 1.1.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/CHANGELOG.md +72 -1
- {azure_ai_evaluation-1.1.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.3.0}/PKG-INFO +77 -7
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/README.md +1 -1
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/__init__.py +1 -15
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_clients.py +24 -8
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_models.py +2 -2
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/utils.py +8 -8
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_constants.py +21 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +2 -1
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_eval_run.py +3 -1
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_evaluate.py +74 -14
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_utils.py +27 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +46 -25
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +69 -4
- azure_ai_evaluation-1.3.0/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +61 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +7 -1
- azure_ai_evaluation-1.3.0/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +5 -42
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +2 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +2 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +2 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +2 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +61 -68
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +45 -23
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +55 -34
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +32 -27
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +44 -23
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +41 -81
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_exceptions.py +0 -1
- azure_ai_evaluation-1.3.0/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +640 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_version.py +2 -1
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +10 -3
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +4 -5
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -0
- azure_ai_evaluation-1.3.0/azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_simulator.py +21 -13
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0/azure_ai_evaluation.egg-info}/PKG-INFO +77 -7
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/SOURCES.txt +7 -9
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/requires.txt +2 -2
- azure_ai_evaluation-1.3.0/migration_guide.md +243 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/samples/README.md +1 -1
- azure_ai_evaluation-1.3.0/samples/evaluation_samples_safety_evaluation.py +251 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_simulate.py +1 -1
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/setup.py +4 -5
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/conftest.py +25 -2
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_adv_simulator.py +1 -2
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_builtin_evaluators.py +0 -16
- azure_ai_evaluation-1.3.0/tests/e2etests/test_evaluate.py +501 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_lite_management_client.py +12 -3
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_mass_evaluate.py +89 -91
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_metrics_upload.py +11 -1
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/test_sim_and_eval.py +8 -5
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_built_in_evaluator.py +1 -1
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate.py +206 -35
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate_performance.py +9 -13
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +42 -0
- azure_ai_evaluation-1.3.0/tests/unittests/test_safety_evaluation.py +215 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_save_eval.py +6 -4
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_synthetic_callback_conv_bot.py +5 -4
- azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
- azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
- azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
- azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
- azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
- azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
- azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
- azure_ai_evaluation-1.1.0/azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
- azure_ai_evaluation-1.1.0/tests/__pf_service_isolation.py +0 -28
- azure_ai_evaluation-1.1.0/tests/e2etests/test_evaluate.py +0 -953
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/MANIFEST.in +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/TROUBLESHOOTING.md +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/constants.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/math.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/rai_service.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_http_utils.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_model_configurations.py +0 -0
- {azure_ai_evaluation-1.1.0/azure/ai/evaluation/_vendor → azure_ai_evaluation-1.3.0/azure/ai/evaluation/_safety_evaluation}/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_user_agent.py +0 -0
- {azure_ai_evaluation-1.1.0/azure/ai/evaluation/simulator/_data_sources → azure_ai_evaluation-1.3.0/azure/ai/evaluation/_vendor}/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/py.typed +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_constants.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_tracing.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/pyproject.toml +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/samples/data/evaluate_test_data.jsonl +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_common.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/samples/evaluation_samples_evaluate.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/setup.cfg +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/__openai_patcher.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/__init__.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/e2etests/target_fn.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_batch_run_context.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_content_safety_rai_script.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_eval_run.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluate_telemetry.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/apology_dag/apology.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_evaluators/slow_eval.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_non_adv_simulator.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_simulator.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
- {azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/tests/unittests/test_utils.py +0 -0
|
@@ -1,10 +1,81 @@
|
|
|
1
1
|
# Release History
|
|
2
2
|
|
|
3
|
-
## 1.
|
|
3
|
+
## 1.3.0 (2025-02-28)
|
|
4
|
+
|
|
5
|
+
### Breaking Changes
|
|
6
|
+
- Multimodal specific evaluators `ContentSafetyMultimodalEvaluator`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` has been removed. Please use `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator` instead.
|
|
7
|
+
- Metric name in ProtectedMaterialEvaluator's output is changed from `protected_material.fictional_characters_label` to `protected_material.fictional_characters_defect_rate`. It's now consistent with other evaluator's metric names (ending with `_defect_rate`).
|
|
8
|
+
|
|
9
|
+
## 1.2.0 (2025-01-27)
|
|
10
|
+
|
|
11
|
+
### Features Added
|
|
12
|
+
- CSV files are now supported as data file inputs with `evaluate()` API. The CSV file should have a header row with column names that match the `data` and `target` fields in the `evaluate()` method and the filename should be passed as the `data` parameter. Column name 'Conversation' in CSV file is not fully supported yet.
|
|
13
|
+
|
|
14
|
+
### Breaking Changes
|
|
15
|
+
- `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be removed in next release.
|
|
4
16
|
|
|
5
17
|
### Bugs Fixed
|
|
6
18
|
- Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Studio.
|
|
7
19
|
- Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
|
|
20
|
+
- Fixed the non adversarial simulator to run in task-free mode
|
|
21
|
+
- Content safety evaluators (violence, self harm, sexual, hate/unfairness) return the maximum result as the
|
|
22
|
+
main score when aggregating per-turn evaluations from a conversation into an overall
|
|
23
|
+
evaluation score. Other conversation-capable evaluators still default to a mean for aggregation.
|
|
24
|
+
- Fixed bug in non adversarial simulator sample where `tasks` undefined
|
|
25
|
+
|
|
26
|
+
### Other Changes
|
|
27
|
+
- Changed minimum required python version to use this package from 3.8 to 3.9
|
|
28
|
+
- Stop dependency on the local promptflow service. No promptflow service will automatically start when running evaluation.
|
|
29
|
+
- Evaluators internally allow for custom aggregation. However, this causes serialization failures if evaluated while the
|
|
30
|
+
environment variable `AI_EVALS_BATCH_USE_ASYNC` is set to false.
|
|
31
|
+
|
|
32
|
+
## 1.1.0 (2024-12-12)
|
|
33
|
+
|
|
34
|
+
### Features Added
|
|
35
|
+
- Added image support in `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator`. Provide image URLs or base64 encoded images in `conversation` input for image evaluation. See below for an example:
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
evaluator = ContentSafetyEvaluator(credential=azure_cred, azure_ai_project=project_scope)
|
|
39
|
+
conversation = {
|
|
40
|
+
"messages": [
|
|
41
|
+
{
|
|
42
|
+
"role": "system",
|
|
43
|
+
"content": [
|
|
44
|
+
{"type": "text", "text": "You are an AI assistant that understands images."}
|
|
45
|
+
],
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"role": "user",
|
|
49
|
+
"content": [
|
|
50
|
+
{"type": "text", "text": "Can you describe this image?"},
|
|
51
|
+
{
|
|
52
|
+
"type": "image_url",
|
|
53
|
+
"image_url": {
|
|
54
|
+
"url": "https://cdn.britannica.com/68/178268-050-5B4E7FB6/Tom-Cruise-2013.jpg"
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
],
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"role": "assistant",
|
|
61
|
+
"content": [
|
|
62
|
+
{
|
|
63
|
+
"type": "text",
|
|
64
|
+
"text": "The image shows a man with short brown hair smiling, wearing a dark-colored shirt.",
|
|
65
|
+
}
|
|
66
|
+
],
|
|
67
|
+
},
|
|
68
|
+
]
|
|
69
|
+
}
|
|
70
|
+
print("Calling Content Safety Evaluator for multi-modal")
|
|
71
|
+
score = evaluator(conversation=conversation)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
- Please switch to generic evaluators for image evaluations as mentioned above. `ContentSafetyMultimodalEvaluator`, `ContentSafetyMultimodalEvaluatorBase`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be deprecated in the next release.
|
|
75
|
+
|
|
76
|
+
### Bugs Fixed
|
|
77
|
+
- Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Foundry portal.
|
|
78
|
+
- Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
|
|
8
79
|
|
|
9
80
|
## 1.0.1 (2024-11-15)
|
|
10
81
|
|
{azure_ai_evaluation-1.1.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.3.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -13,17 +13,16 @@ Classifier: Development Status :: 5 - Production/Stable
|
|
|
13
13
|
Classifier: Programming Language :: Python
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.9
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
20
19
|
Classifier: License :: OSI Approved :: MIT License
|
|
21
20
|
Classifier: Operating System :: OS Independent
|
|
22
|
-
Requires-Python: >=3.
|
|
21
|
+
Requires-Python: >=3.9
|
|
23
22
|
Description-Content-Type: text/markdown
|
|
24
23
|
License-File: NOTICE.txt
|
|
25
|
-
Requires-Dist: promptflow-devkit>=1.
|
|
26
|
-
Requires-Dist: promptflow-core>=1.
|
|
24
|
+
Requires-Dist: promptflow-devkit>=1.17.1
|
|
25
|
+
Requires-Dist: promptflow-core>=1.17.1
|
|
27
26
|
Requires-Dist: pyjwt>=2.8.0
|
|
28
27
|
Requires-Dist: azure-identity>=1.16.0
|
|
29
28
|
Requires-Dist: azure-core>=1.30.2
|
|
@@ -54,7 +53,7 @@ Azure AI SDK provides following to evaluate Generative AI Applications:
|
|
|
54
53
|
|
|
55
54
|
### Prerequisites
|
|
56
55
|
|
|
57
|
-
- Python 3.
|
|
56
|
+
- Python 3.9 or later is required to use this package.
|
|
58
57
|
- [Optional] You must have [Azure AI Project][ai_project] or [Azure Open AI][azure_openai] to use AI-assisted evaluators
|
|
59
58
|
|
|
60
59
|
### Install the package
|
|
@@ -378,11 +377,82 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
378
377
|
|
|
379
378
|
# Release History
|
|
380
379
|
|
|
381
|
-
## 1.
|
|
380
|
+
## 1.3.0 (2025-02-28)
|
|
381
|
+
|
|
382
|
+
### Breaking Changes
|
|
383
|
+
- Multimodal specific evaluators `ContentSafetyMultimodalEvaluator`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` has been removed. Please use `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator` instead.
|
|
384
|
+
- Metric name in ProtectedMaterialEvaluator's output is changed from `protected_material.fictional_characters_label` to `protected_material.fictional_characters_defect_rate`. It's now consistent with other evaluator's metric names (ending with `_defect_rate`).
|
|
385
|
+
|
|
386
|
+
## 1.2.0 (2025-01-27)
|
|
387
|
+
|
|
388
|
+
### Features Added
|
|
389
|
+
- CSV files are now supported as data file inputs with `evaluate()` API. The CSV file should have a header row with column names that match the `data` and `target` fields in the `evaluate()` method and the filename should be passed as the `data` parameter. Column name 'Conversation' in CSV file is not fully supported yet.
|
|
390
|
+
|
|
391
|
+
### Breaking Changes
|
|
392
|
+
- `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be removed in next release.
|
|
382
393
|
|
|
383
394
|
### Bugs Fixed
|
|
384
395
|
- Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Studio.
|
|
385
396
|
- Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
|
|
397
|
+
- Fixed the non adversarial simulator to run in task-free mode
|
|
398
|
+
- Content safety evaluators (violence, self harm, sexual, hate/unfairness) return the maximum result as the
|
|
399
|
+
main score when aggregating per-turn evaluations from a conversation into an overall
|
|
400
|
+
evaluation score. Other conversation-capable evaluators still default to a mean for aggregation.
|
|
401
|
+
- Fixed bug in non adversarial simulator sample where `tasks` undefined
|
|
402
|
+
|
|
403
|
+
### Other Changes
|
|
404
|
+
- Changed minimum required python version to use this package from 3.8 to 3.9
|
|
405
|
+
- Stop dependency on the local promptflow service. No promptflow service will automatically start when running evaluation.
|
|
406
|
+
- Evaluators internally allow for custom aggregation. However, this causes serialization failures if evaluated while the
|
|
407
|
+
environment variable `AI_EVALS_BATCH_USE_ASYNC` is set to false.
|
|
408
|
+
|
|
409
|
+
## 1.1.0 (2024-12-12)
|
|
410
|
+
|
|
411
|
+
### Features Added
|
|
412
|
+
- Added image support in `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator`. Provide image URLs or base64 encoded images in `conversation` input for image evaluation. See below for an example:
|
|
413
|
+
|
|
414
|
+
```python
|
|
415
|
+
evaluator = ContentSafetyEvaluator(credential=azure_cred, azure_ai_project=project_scope)
|
|
416
|
+
conversation = {
|
|
417
|
+
"messages": [
|
|
418
|
+
{
|
|
419
|
+
"role": "system",
|
|
420
|
+
"content": [
|
|
421
|
+
{"type": "text", "text": "You are an AI assistant that understands images."}
|
|
422
|
+
],
|
|
423
|
+
},
|
|
424
|
+
{
|
|
425
|
+
"role": "user",
|
|
426
|
+
"content": [
|
|
427
|
+
{"type": "text", "text": "Can you describe this image?"},
|
|
428
|
+
{
|
|
429
|
+
"type": "image_url",
|
|
430
|
+
"image_url": {
|
|
431
|
+
"url": "https://cdn.britannica.com/68/178268-050-5B4E7FB6/Tom-Cruise-2013.jpg"
|
|
432
|
+
},
|
|
433
|
+
},
|
|
434
|
+
],
|
|
435
|
+
},
|
|
436
|
+
{
|
|
437
|
+
"role": "assistant",
|
|
438
|
+
"content": [
|
|
439
|
+
{
|
|
440
|
+
"type": "text",
|
|
441
|
+
"text": "The image shows a man with short brown hair smiling, wearing a dark-colored shirt.",
|
|
442
|
+
}
|
|
443
|
+
],
|
|
444
|
+
},
|
|
445
|
+
]
|
|
446
|
+
}
|
|
447
|
+
print("Calling Content Safety Evaluator for multi-modal")
|
|
448
|
+
score = evaluator(conversation=conversation)
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
- Please switch to generic evaluators for image evaluations as mentioned above. `ContentSafetyMultimodalEvaluator`, `ContentSafetyMultimodalEvaluatorBase`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` will be deprecated in the next release.
|
|
452
|
+
|
|
453
|
+
### Bugs Fixed
|
|
454
|
+
- Removed `[remote]` extra. This is no longer needed when tracking results in Azure AI Foundry portal.
|
|
455
|
+
- Fixed `AttributeError: 'NoneType' object has no attribute 'get'` while running simulator with 1000+ results
|
|
386
456
|
|
|
387
457
|
## 1.0.1 (2024-11-15)
|
|
388
458
|
|
|
@@ -22,7 +22,7 @@ Azure AI SDK provides following to evaluate Generative AI Applications:
|
|
|
22
22
|
|
|
23
23
|
### Prerequisites
|
|
24
24
|
|
|
25
|
-
- Python 3.
|
|
25
|
+
- Python 3.9 or later is required to use this package.
|
|
26
26
|
- [Optional] You must have [Azure AI Project][ai_project] or [Azure Open AI][azure_openai] to use AI-assisted evaluators
|
|
27
27
|
|
|
28
28
|
### Install the package
|
|
@@ -12,14 +12,6 @@ from ._evaluators._content_safety import (
|
|
|
12
12
|
SexualEvaluator,
|
|
13
13
|
ViolenceEvaluator,
|
|
14
14
|
)
|
|
15
|
-
from ._evaluators._multimodal._content_safety_multimodal import (
|
|
16
|
-
ContentSafetyMultimodalEvaluator,
|
|
17
|
-
HateUnfairnessMultimodalEvaluator,
|
|
18
|
-
SelfHarmMultimodalEvaluator,
|
|
19
|
-
SexualMultimodalEvaluator,
|
|
20
|
-
ViolenceMultimodalEvaluator,
|
|
21
|
-
)
|
|
22
|
-
from ._evaluators._multimodal._protected_material import ProtectedMaterialMultimodalEvaluator
|
|
23
15
|
from ._evaluators._f1_score import F1ScoreEvaluator
|
|
24
16
|
from ._evaluators._fluency import FluencyEvaluator
|
|
25
17
|
from ._evaluators._gleu import GleuScoreEvaluator
|
|
@@ -72,11 +64,5 @@ __all__ = [
|
|
|
72
64
|
"EvaluatorConfig",
|
|
73
65
|
"Conversation",
|
|
74
66
|
"Message",
|
|
75
|
-
"EvaluationResult"
|
|
76
|
-
"ContentSafetyMultimodalEvaluator",
|
|
77
|
-
"HateUnfairnessMultimodalEvaluator",
|
|
78
|
-
"SelfHarmMultimodalEvaluator",
|
|
79
|
-
"SexualMultimodalEvaluator",
|
|
80
|
-
"ViolenceMultimodalEvaluator",
|
|
81
|
-
"ProtectedMaterialMultimodalEvaluator",
|
|
67
|
+
"EvaluationResult"
|
|
82
68
|
]
|
{azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_clients.py
RENAMED
|
@@ -17,7 +17,7 @@ from azure.ai.evaluation.simulator._model_tools._identity_manager import TokenSc
|
|
|
17
17
|
from ._models import BlobStoreInfo, Workspace
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
API_VERSION: Final[str] = "2024-
|
|
20
|
+
API_VERSION: Final[str] = "2024-07-01-preview"
|
|
21
21
|
QUERY_KEY_API_VERSION: Final[str] = "api-version"
|
|
22
22
|
PATH_ML_WORKSPACES = ("providers", "Microsoft.MachineLearningServices", "workspaces")
|
|
23
23
|
|
|
@@ -69,7 +69,9 @@ class LiteMLClient:
|
|
|
69
69
|
self._get_token_manager()
|
|
70
70
|
return cast(TokenCredential, self._credential)
|
|
71
71
|
|
|
72
|
-
def workspace_get_default_datastore(
|
|
72
|
+
def workspace_get_default_datastore(
|
|
73
|
+
self, workspace_name: str, *, include_credentials: bool = False, **kwargs: Any
|
|
74
|
+
) -> BlobStoreInfo:
|
|
73
75
|
# 1. Get the default blob store
|
|
74
76
|
# REST API documentation:
|
|
75
77
|
# https://learn.microsoft.com/rest/api/azureml/datastores/list?view=rest-azureml-2024-10-01
|
|
@@ -92,18 +94,29 @@ class LiteMLClient:
|
|
|
92
94
|
account_name = props_json["accountName"]
|
|
93
95
|
endpoint = props_json["endpoint"]
|
|
94
96
|
container_name = props_json["containerName"]
|
|
97
|
+
credential_type = props_json.get("credentials", {}).get("credentialsType")
|
|
95
98
|
|
|
96
99
|
# 2. Get the SAS token to use for accessing the blob store
|
|
97
100
|
# REST API documentation:
|
|
98
101
|
# https://learn.microsoft.com/rest/api/azureml/datastores/list-secrets?view=rest-azureml-2024-10-01
|
|
99
|
-
blob_store_credential: Optional[Union[AzureSasCredential, str]]
|
|
100
|
-
if include_credentials:
|
|
102
|
+
blob_store_credential: Optional[Union[AzureSasCredential, TokenCredential, str]]
|
|
103
|
+
if not include_credentials:
|
|
104
|
+
blob_store_credential = None
|
|
105
|
+
elif credential_type and credential_type.lower() == "none":
|
|
106
|
+
# If storage account key access is disabled, and only Microsoft Entra ID authentication is available,
|
|
107
|
+
# the credentialsType will be "None" and we should not attempt to get the secrets.
|
|
108
|
+
blob_store_credential = self.get_credential()
|
|
109
|
+
else:
|
|
101
110
|
url = self._generate_path(
|
|
102
111
|
*PATH_ML_WORKSPACES, workspace_name, "datastores", "workspaceblobstore", "listSecrets"
|
|
103
112
|
)
|
|
104
113
|
secrets_response = self._http_client.request(
|
|
105
114
|
method="POST",
|
|
106
115
|
url=url,
|
|
116
|
+
json={
|
|
117
|
+
"expirableSecret": True,
|
|
118
|
+
"expireAfterHours": int(kwargs.get("key_expiration_hours", 1)),
|
|
119
|
+
},
|
|
107
120
|
params={
|
|
108
121
|
QUERY_KEY_API_VERSION: self._api_version,
|
|
109
122
|
},
|
|
@@ -114,10 +127,13 @@ class LiteMLClient:
|
|
|
114
127
|
secrets_json = secrets_response.json()
|
|
115
128
|
secrets_type = secrets_json["secretsType"].lower()
|
|
116
129
|
|
|
130
|
+
# As per this website, only SAS tokens, access tokens, or Entra IDs are valid for accessing blob data
|
|
131
|
+
# stores:
|
|
132
|
+
# https://learn.microsoft.com/rest/api/storageservices/authorize-requests-to-azure-storage.
|
|
117
133
|
if secrets_type == "sas":
|
|
118
134
|
blob_store_credential = AzureSasCredential(secrets_json["sasToken"])
|
|
119
135
|
elif secrets_type == "accountkey":
|
|
120
|
-
# To support
|
|
136
|
+
# To support older versions of azure-storage-blob better, we return a string here instead of
|
|
121
137
|
# an AzureNamedKeyCredential
|
|
122
138
|
blob_store_credential = secrets_json["key"]
|
|
123
139
|
else:
|
|
@@ -164,19 +180,19 @@ class LiteMLClient:
|
|
|
164
180
|
# nothing to see here, move along
|
|
165
181
|
return
|
|
166
182
|
|
|
167
|
-
|
|
183
|
+
message = f"The {description} request failed with HTTP {response.status_code}"
|
|
168
184
|
try:
|
|
169
185
|
error_json = response.json()["error"]
|
|
170
186
|
additional_info = f"({error_json['code']}) {error_json['message']}"
|
|
187
|
+
message += f" - {additional_info}"
|
|
171
188
|
except (JSONDecodeError, ValueError, KeyError):
|
|
172
189
|
pass
|
|
173
190
|
|
|
174
191
|
raise EvaluationException(
|
|
175
|
-
message=
|
|
192
|
+
message=message,
|
|
176
193
|
target=ErrorTarget.EVALUATE,
|
|
177
194
|
category=ErrorCategory.FAILED_EXECUTION,
|
|
178
195
|
blame=ErrorBlame.SYSTEM_ERROR,
|
|
179
|
-
internal_message=additional_info,
|
|
180
196
|
)
|
|
181
197
|
|
|
182
198
|
def _generate_path(self, *paths: str) -> str:
|
{azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_azure/_models.py
RENAMED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
from typing import Dict, List, NamedTuple, Optional, Union
|
|
10
10
|
from msrest.serialization import Model
|
|
11
|
-
from azure.core.credentials import AzureSasCredential
|
|
11
|
+
from azure.core.credentials import AzureSasCredential, TokenCredential
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class BlobStoreInfo(NamedTuple):
|
|
@@ -16,7 +16,7 @@ class BlobStoreInfo(NamedTuple):
|
|
|
16
16
|
account_name: str
|
|
17
17
|
endpoint: str
|
|
18
18
|
container_name: str
|
|
19
|
-
credential: Optional[Union[AzureSasCredential, str]]
|
|
19
|
+
credential: Optional[Union[AzureSasCredential, TokenCredential, str]]
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class WorkspaceHubConfig(Model):
|
{azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_common/utils.py
RENAMED
|
@@ -366,7 +366,7 @@ def validate_conversation(conversation):
|
|
|
366
366
|
if not isinstance(messages, list):
|
|
367
367
|
raise_exception(
|
|
368
368
|
"'messages' parameter must be a JSON-compatible list of chat messages",
|
|
369
|
-
ErrorTarget.
|
|
369
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
370
370
|
)
|
|
371
371
|
expected_roles = {"user", "assistant", "system"}
|
|
372
372
|
image_found = False
|
|
@@ -393,7 +393,7 @@ def validate_conversation(conversation):
|
|
|
393
393
|
):
|
|
394
394
|
raise_exception(
|
|
395
395
|
f"Messages must be a strongly typed class of ChatRequestMessage. Message number: {num}",
|
|
396
|
-
ErrorTarget.
|
|
396
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
397
397
|
)
|
|
398
398
|
if isinstance(message, AssistantMessage):
|
|
399
399
|
assistant_message_count += 1
|
|
@@ -407,7 +407,7 @@ def validate_conversation(conversation):
|
|
|
407
407
|
if message.get("role") not in expected_roles:
|
|
408
408
|
raise_exception(
|
|
409
409
|
f"Invalid role provided: {message.get('role')}. Message number: {num}",
|
|
410
|
-
ErrorTarget.
|
|
410
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
411
411
|
)
|
|
412
412
|
if message.get("role") == "assistant":
|
|
413
413
|
assistant_message_count += 1
|
|
@@ -417,7 +417,7 @@ def validate_conversation(conversation):
|
|
|
417
417
|
if not isinstance(content, (str, list)):
|
|
418
418
|
raise_exception(
|
|
419
419
|
f"Content in each turn must be a string or array. Message number: {num}",
|
|
420
|
-
ErrorTarget.
|
|
420
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
421
421
|
)
|
|
422
422
|
if isinstance(content, list):
|
|
423
423
|
if any(item.get("type") == "image_url" and "url" in item.get("image_url", {}) for item in content):
|
|
@@ -425,21 +425,21 @@ def validate_conversation(conversation):
|
|
|
425
425
|
if not image_found:
|
|
426
426
|
raise_exception(
|
|
427
427
|
"Message needs to have multi-modal input like images.",
|
|
428
|
-
ErrorTarget.
|
|
428
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
429
429
|
)
|
|
430
430
|
if assistant_message_count == 0:
|
|
431
431
|
raise_exception(
|
|
432
432
|
"Assistant role required in one of the messages.",
|
|
433
|
-
ErrorTarget.
|
|
433
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
434
434
|
)
|
|
435
435
|
if user_message_count == 0:
|
|
436
436
|
raise_exception(
|
|
437
437
|
"User role required in one of the messages.",
|
|
438
|
-
ErrorTarget.
|
|
438
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
439
439
|
)
|
|
440
440
|
if assistant_message_count > 1:
|
|
441
441
|
raise_exception(
|
|
442
442
|
"Evaluators for multimodal conversations only support single turn. "
|
|
443
443
|
"User and assistant role expected as the only role in each message.",
|
|
444
|
-
ErrorTarget.
|
|
444
|
+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
|
|
445
445
|
)
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
+
import enum
|
|
4
5
|
from typing import Literal
|
|
6
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
class EvaluationMetrics:
|
|
@@ -20,6 +22,9 @@ class EvaluationMetrics:
|
|
|
20
22
|
SELF_HARM = "self_harm"
|
|
21
23
|
SEXUAL = "sexual"
|
|
22
24
|
PROTECTED_MATERIAL = "protected_material"
|
|
25
|
+
ARTWORK = "artwork"
|
|
26
|
+
FICTIONAL_CHARACTERS = "fictional_characters"
|
|
27
|
+
LOGOS_AND_BRANDS = "logos_and_brands"
|
|
23
28
|
XPIA = "xpia"
|
|
24
29
|
|
|
25
30
|
|
|
@@ -57,6 +62,22 @@ class EvaluationRunProperties:
|
|
|
57
62
|
EVALUATION_SDK = "_azureml.evaluation_sdk_name"
|
|
58
63
|
|
|
59
64
|
|
|
65
|
+
@experimental
|
|
66
|
+
class _AggregationType(enum.Enum):
|
|
67
|
+
"""Defines how numeric evaluation results should be aggregated
|
|
68
|
+
to produce a single value. Used by individual evaluators to combine per-turn results for
|
|
69
|
+
a conversation-based input. In general, wherever this enum is used, it is also possible
|
|
70
|
+
to directly assign the underlying aggregation function for more complex use cases.
|
|
71
|
+
The 'custom' value is generally not an acceptable input, and should only be used as an output
|
|
72
|
+
to indicate that a custom aggregation function has been injected."""
|
|
73
|
+
|
|
74
|
+
MEAN = "mean"
|
|
75
|
+
MAX = "max"
|
|
76
|
+
MIN = "min"
|
|
77
|
+
SUM = "sum"
|
|
78
|
+
CUSTOM = "custom"
|
|
79
|
+
|
|
80
|
+
|
|
60
81
|
DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
|
|
61
82
|
|
|
62
83
|
CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4
|
|
@@ -5,5 +5,6 @@ from .eval_run_context import EvalRunContext
|
|
|
5
5
|
from .code_client import CodeClient
|
|
6
6
|
from .proxy_client import ProxyClient
|
|
7
7
|
from .target_run_context import TargetRunContext
|
|
8
|
+
from .proxy_client import ProxyRun
|
|
8
9
|
|
|
9
|
-
__all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext"]
|
|
10
|
+
__all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext", "ProxyRun"]
|
{azure_ai_evaluation-1.1.0 → azure_ai_evaluation-1.3.0}/azure/ai/evaluation/_evaluate/_eval_run.py
RENAMED
|
@@ -421,7 +421,9 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
421
421
|
local_paths.append(local_file_path)
|
|
422
422
|
|
|
423
423
|
# We will write the artifacts to the workspaceblobstore
|
|
424
|
-
datastore = self._management_client.workspace_get_default_datastore(
|
|
424
|
+
datastore = self._management_client.workspace_get_default_datastore(
|
|
425
|
+
self._workspace_name, include_credentials=True
|
|
426
|
+
)
|
|
425
427
|
account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
|
|
426
428
|
|
|
427
429
|
svc_client = BlobServiceClient(account_url=account_url, credential=datastore.credential)
|