azure-ai-evaluation 1.0.0b3__tar.gz → 1.0.0b4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/CHANGELOG.md +37 -3
- azure_ai_evaluation-1.0.0b4/NOTICE.txt +50 -0
- {azure_ai_evaluation-1.0.0b3/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.0.0b4}/PKG-INFO +72 -44
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/README.md +30 -34
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_common/constants.py +4 -2
- azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_common/math.py +18 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_common/rai_service.py +54 -62
- azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_common/utils.py +272 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_constants.py +10 -2
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +10 -3
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +33 -17
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +17 -2
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_eval_run.py +26 -10
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_evaluate.py +116 -62
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +16 -17
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_utils.py +44 -25
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +3 -2
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +59 -30
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +10 -13
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +18 -20
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +15 -20
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +63 -42
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +4 -4
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +4 -4
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +4 -4
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +4 -4
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_eci/_eci.py +4 -4
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +14 -6
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +3 -2
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +3 -2
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +4 -4
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_qa/_qa.py +4 -3
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +3 -2
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +11 -8
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +1 -1
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +21 -7
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +4 -5
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_exceptions.py +9 -6
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_http_utils.py +203 -132
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_model_configurations.py +5 -5
- azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
- azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
- azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
- azure_ai_evaluation-1.0.0b4/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_version.py +1 -1
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_adversarial_simulator.py +85 -60
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_conversation/__init__.py +13 -12
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -4
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +24 -66
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_helpers/_experimental.py +20 -9
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +4 -4
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +22 -64
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +67 -21
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +28 -11
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +68 -24
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/models.py +10 -10
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -5
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -4
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_simulator.py +112 -113
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_tracing.py +4 -4
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4/azure_ai_evaluation.egg-info}/PKG-INFO +72 -44
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure_ai_evaluation.egg-info/SOURCES.txt +8 -0
- azure_ai_evaluation-1.0.0b4/azure_ai_evaluation.egg-info/requires.txt +9 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/pyproject.toml +1 -2
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/setup.py +3 -5
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +9 -2
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/test_adv_simulator.py +51 -24
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/test_builtin_evaluators.py +16 -16
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/test_evaluate.py +12 -8
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/test_sim_and_eval.py +2 -3
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_content_safety_rai_script.py +11 -11
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_eval_run.py +5 -2
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_evaluate.py +4 -4
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_evaluate_telemetry.py +10 -9
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_jailbreak_simulator.py +4 -3
- azure_ai_evaluation-1.0.0b4/tests/unittests/test_non_adv_simulator.py +359 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_simulator.py +4 -5
- azure_ai_evaluation-1.0.0b3/azure/ai/evaluation/_common/utils.py +0 -102
- azure_ai_evaluation-1.0.0b3/azure_ai_evaluation.egg-info/requires.txt +0 -16
- azure_ai_evaluation-1.0.0b3/tests/unittests/test_non_adv_simulator.py +0 -129
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/MANIFEST.in +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/__init__.py +1 -1
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_user_agent.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/py.typed +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_constants.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/simulator/_utils.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/setup.cfg +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/__openai_patcher.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/conftest.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/__init__.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/target_fn.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/e2etests/test_metrics_upload.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_batch_run_context.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_built_in_evaluator.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_content_safety_defect_rate.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_evaluators/apology_dag/apology.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_save_eval.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
- {azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/tests/unittests/test_utils.py +0 -0
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Release History
|
|
2
2
|
|
|
3
|
+
## 1.0.0b4 (2024-10-16)
|
|
4
|
+
|
|
5
|
+
### Breaking Changes
|
|
6
|
+
|
|
7
|
+
- Removed `numpy` dependency. All NaN values returned by the SDK have been changed to from `numpy.nan` to `math.nan`.
|
|
8
|
+
- `credential` is now required to be passed in for all content safety evaluators and `ProtectedMaterialsEvaluator`. `DefaultAzureCredential` will no longer be chosen if a credential is not passed.
|
|
9
|
+
- Changed package extra name from "pf-azure" to "remote".
|
|
10
|
+
|
|
11
|
+
### Bugs Fixed
|
|
12
|
+
- Adversarial Conversation simulations would fail with `Forbidden`. Added logic to re-fetch token in the exponential retry logic to retrive RAI Service response.
|
|
13
|
+
|
|
14
|
+
### Other Changes
|
|
15
|
+
- Enhance the error message to provide clearer instruction when required packages for the remote tracking feature are missing.
|
|
16
|
+
|
|
3
17
|
## 1.0.0b3 (2024-10-01)
|
|
4
18
|
|
|
5
19
|
### Features Added
|
|
@@ -54,9 +68,29 @@ evaluate(
|
|
|
54
68
|
)
|
|
55
69
|
```
|
|
56
70
|
|
|
71
|
+
- Simulator now requires a model configuration to call the prompty instead of an Azure AI project scope. This enables the usage of simulator with Entra ID based auth.
|
|
72
|
+
Before:
|
|
73
|
+
```python
|
|
74
|
+
azure_ai_project = {
|
|
75
|
+
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
|
|
76
|
+
"resource_group_name": os.environ.get("RESOURCE_GROUP"),
|
|
77
|
+
"project_name": os.environ.get("PROJECT_NAME"),
|
|
78
|
+
}
|
|
79
|
+
sim = Simulator(azure_ai_project=azure_ai_project, credentails=DefaultAzureCredentials())
|
|
80
|
+
```
|
|
81
|
+
After:
|
|
82
|
+
```python
|
|
83
|
+
model_config = {
|
|
84
|
+
"azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
|
|
85
|
+
"azure_deployment": os.environ.get("AZURE_DEPLOYMENT"),
|
|
86
|
+
}
|
|
87
|
+
sim = Simulator(model_config=model_config)
|
|
88
|
+
```
|
|
89
|
+
If `api_key` is not included in the `model_config`, the prompty runtime in `promptflow-core` will pick up `DefaultAzureCredential`.
|
|
90
|
+
|
|
57
91
|
### Bugs Fixed
|
|
58
92
|
|
|
59
|
-
- Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration`
|
|
93
|
+
- Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration`
|
|
60
94
|
|
|
61
95
|
## 1.0.0b2 (2024-09-24)
|
|
62
96
|
|
|
@@ -69,9 +103,9 @@ evaluate(
|
|
|
69
103
|
### Breaking Changes
|
|
70
104
|
|
|
71
105
|
- The `synthetic` namespace has been renamed to `simulator`, and sub-namespaces under this module have been removed
|
|
72
|
-
- The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
|
|
106
|
+
- The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
|
|
73
107
|
- The parameter name `project_scope` in content safety evaluators have been renamed to `azure_ai_project` for consistency with evaluate API and simulators.
|
|
74
|
-
- Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
|
|
108
|
+
- Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
|
|
75
109
|
- Updated the parameter names for `question` and `answer` in built-in evaluators to more generic terms: `query` and `response`.
|
|
76
110
|
|
|
77
111
|
### Features Added
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
NOTICES AND INFORMATION
|
|
2
|
+
Do Not Translate or Localize
|
|
3
|
+
|
|
4
|
+
This software incorporates material from third parties.
|
|
5
|
+
Microsoft makes certain open source code available at https://3rdpartysource.microsoft.com,
|
|
6
|
+
or you may send a check or money order for US $5.00, including the product name,
|
|
7
|
+
the open source component name, platform, and version number, to:
|
|
8
|
+
|
|
9
|
+
Source Code Compliance Team
|
|
10
|
+
Microsoft Corporation
|
|
11
|
+
One Microsoft Way
|
|
12
|
+
Redmond, WA 98052
|
|
13
|
+
USA
|
|
14
|
+
|
|
15
|
+
Notwithstanding any other terms, you may reverse engineer this software to the extent
|
|
16
|
+
required to debug changes to any libraries licensed under the GNU Lesser General Public License.
|
|
17
|
+
|
|
18
|
+
License notice for nltk
|
|
19
|
+
---------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
Copyright 2024 The NLTK Project
|
|
22
|
+
|
|
23
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
24
|
+
you may not use this file except in compliance with the License.
|
|
25
|
+
You may obtain a copy of the License at
|
|
26
|
+
|
|
27
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
28
|
+
|
|
29
|
+
Unless required by applicable law or agreed to in writing, software
|
|
30
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
31
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
32
|
+
See the License for the specific language governing permissions and
|
|
33
|
+
limitations under the License.
|
|
34
|
+
|
|
35
|
+
License notice for rouge-score
|
|
36
|
+
---------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
Copyright 2024 The Google Research Authors
|
|
39
|
+
|
|
40
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
41
|
+
you may not use this file except in compliance with the License.
|
|
42
|
+
You may obtain a copy of the License at
|
|
43
|
+
|
|
44
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
45
|
+
|
|
46
|
+
Unless required by applicable law or agreed to in writing, software
|
|
47
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
48
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
49
|
+
See the License for the specific language governing permissions and
|
|
50
|
+
limitations under the License.
|
{azure_ai_evaluation-1.0.0b3/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.0.0b4}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.0b4
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -21,17 +21,15 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
21
21
|
Classifier: Operating System :: OS Independent
|
|
22
22
|
Requires-Python: >=3.8
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: NOTICE.txt
|
|
24
25
|
Requires-Dist: promptflow-devkit>=1.15.0
|
|
25
26
|
Requires-Dist: promptflow-core>=1.15.0
|
|
26
|
-
Requires-Dist: numpy>=1.23.2; python_version < "3.12"
|
|
27
|
-
Requires-Dist: numpy>=1.26.4; python_version >= "3.12"
|
|
28
27
|
Requires-Dist: pyjwt>=2.8.0
|
|
29
|
-
Requires-Dist: azure-identity>=1.
|
|
28
|
+
Requires-Dist: azure-identity>=1.16.0
|
|
30
29
|
Requires-Dist: azure-core>=1.30.2
|
|
31
30
|
Requires-Dist: nltk>=3.9.1
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
Requires-Dist: promptflow-azure<2.0.0,>=1.15.0; extra == "pf-azure"
|
|
31
|
+
Provides-Extra: remote
|
|
32
|
+
Requires-Dist: promptflow-azure<2.0.0,>=1.15.0; extra == "remote"
|
|
35
33
|
|
|
36
34
|
# Azure AI Evaluation client library for Python
|
|
37
35
|
|
|
@@ -154,11 +152,6 @@ name: ApplicationPrompty
|
|
|
154
152
|
description: Simulates an application
|
|
155
153
|
model:
|
|
156
154
|
api: chat
|
|
157
|
-
configuration:
|
|
158
|
-
type: azure_openai
|
|
159
|
-
azure_deployment: ${env:AZURE_DEPLOYMENT}
|
|
160
|
-
api_key: ${env:AZURE_OPENAI_API_KEY}
|
|
161
|
-
azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
|
|
162
155
|
parameters:
|
|
163
156
|
temperature: 0.0
|
|
164
157
|
top_p: 1.0
|
|
@@ -187,52 +180,55 @@ import asyncio
|
|
|
187
180
|
from typing import Any, Dict, List, Optional
|
|
188
181
|
from azure.ai.evaluation.simulator import Simulator
|
|
189
182
|
from promptflow.client import load_flow
|
|
190
|
-
from azure.identity import DefaultAzureCredential
|
|
191
183
|
import os
|
|
184
|
+
import wikipedia
|
|
192
185
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
"
|
|
196
|
-
"
|
|
186
|
+
# Set up the model configuration without api_key, using DefaultAzureCredential
|
|
187
|
+
model_config = {
|
|
188
|
+
"azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
|
|
189
|
+
"azure_deployment": os.environ.get("AZURE_DEPLOYMENT"),
|
|
190
|
+
# not providing key would make the SDK pick up `DefaultAzureCredential`
|
|
191
|
+
# use "api_key": "<your API key>"
|
|
197
192
|
}
|
|
198
193
|
|
|
199
|
-
|
|
200
|
-
wiki_search_term = "Leonardo da
|
|
194
|
+
# Use Wikipedia to get some text for the simulation
|
|
195
|
+
wiki_search_term = "Leonardo da Vinci"
|
|
201
196
|
wiki_title = wikipedia.search(wiki_search_term)[0]
|
|
202
197
|
wiki_page = wikipedia.page(wiki_title)
|
|
203
198
|
text = wiki_page.summary[:1000]
|
|
204
199
|
|
|
205
|
-
def method_to_invoke_application_prompty(query: str):
|
|
200
|
+
def method_to_invoke_application_prompty(query: str, messages_list: List[Dict], context: Optional[Dict]):
|
|
206
201
|
try:
|
|
207
202
|
current_dir = os.path.dirname(__file__)
|
|
208
203
|
prompty_path = os.path.join(current_dir, "application.prompty")
|
|
209
|
-
_flow = load_flow(
|
|
210
|
-
|
|
211
|
-
|
|
204
|
+
_flow = load_flow(
|
|
205
|
+
source=prompty_path,
|
|
206
|
+
model=model_config,
|
|
207
|
+
credential=DefaultAzureCredential()
|
|
208
|
+
)
|
|
212
209
|
response = _flow(
|
|
213
210
|
query=query,
|
|
214
211
|
context=context,
|
|
215
212
|
conversation_history=messages_list
|
|
216
213
|
)
|
|
217
214
|
return response
|
|
218
|
-
except:
|
|
219
|
-
print("Something went wrong invoking the prompty")
|
|
215
|
+
except Exception as e:
|
|
216
|
+
print(f"Something went wrong invoking the prompty: {e}")
|
|
220
217
|
return "something went wrong"
|
|
221
218
|
|
|
222
219
|
async def callback(
|
|
223
|
-
messages: List[Dict],
|
|
220
|
+
messages: Dict[str, List[Dict]],
|
|
224
221
|
stream: bool = False,
|
|
225
222
|
session_state: Any = None, # noqa: ANN401
|
|
226
223
|
context: Optional[Dict[str, Any]] = None,
|
|
227
224
|
) -> dict:
|
|
228
225
|
messages_list = messages["messages"]
|
|
229
|
-
#
|
|
226
|
+
# Get the last message from the user
|
|
230
227
|
latest_message = messages_list[-1]
|
|
231
228
|
query = latest_message["content"]
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
response
|
|
235
|
-
# we are formatting the response to follow the openAI chat protocol format
|
|
229
|
+
# Call your endpoint or AI application here
|
|
230
|
+
response = method_to_invoke_application_prompty(query, messages_list, context)
|
|
231
|
+
# Format the response to follow the OpenAI chat protocol format
|
|
236
232
|
formatted_response = {
|
|
237
233
|
"content": response,
|
|
238
234
|
"role": "assistant",
|
|
@@ -243,10 +239,8 @@ async def callback(
|
|
|
243
239
|
messages["messages"].append(formatted_response)
|
|
244
240
|
return {"messages": messages["messages"], "stream": stream, "session_state": session_state, "context": context}
|
|
245
241
|
|
|
246
|
-
|
|
247
|
-
|
|
248
242
|
async def main():
|
|
249
|
-
simulator = Simulator(
|
|
243
|
+
simulator = Simulator(model_config=model_config)
|
|
250
244
|
outputs = await simulator(
|
|
251
245
|
target=callback,
|
|
252
246
|
text=text,
|
|
@@ -257,17 +251,17 @@ async def main():
|
|
|
257
251
|
f"I am a teacher and I want to teach my students about {wiki_search_term}"
|
|
258
252
|
],
|
|
259
253
|
)
|
|
260
|
-
print(json.dumps(outputs))
|
|
254
|
+
print(json.dumps(outputs, indent=2))
|
|
261
255
|
|
|
262
256
|
if __name__ == "__main__":
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
os.environ["
|
|
267
|
-
os.environ["
|
|
268
|
-
os.environ["AZURE_DEPLOYMENT"] = ""
|
|
257
|
+
# Ensure that the following environment variables are set in your environment:
|
|
258
|
+
# AZURE_OPENAI_ENDPOINT and AZURE_DEPLOYMENT
|
|
259
|
+
# Example:
|
|
260
|
+
# os.environ["AZURE_OPENAI_ENDPOINT"] = "https://your-endpoint.openai.azure.com/"
|
|
261
|
+
# os.environ["AZURE_DEPLOYMENT"] = "your-deployment-name"
|
|
269
262
|
asyncio.run(main())
|
|
270
263
|
print("done!")
|
|
264
|
+
|
|
271
265
|
```
|
|
272
266
|
|
|
273
267
|
#### Adversarial Simulator
|
|
@@ -426,6 +420,20 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
426
420
|
|
|
427
421
|
# Release History
|
|
428
422
|
|
|
423
|
+
## 1.0.0b4 (2024-10-16)
|
|
424
|
+
|
|
425
|
+
### Breaking Changes
|
|
426
|
+
|
|
427
|
+
- Removed `numpy` dependency. All NaN values returned by the SDK have been changed to from `numpy.nan` to `math.nan`.
|
|
428
|
+
- `credential` is now required to be passed in for all content safety evaluators and `ProtectedMaterialsEvaluator`. `DefaultAzureCredential` will no longer be chosen if a credential is not passed.
|
|
429
|
+
- Changed package extra name from "pf-azure" to "remote".
|
|
430
|
+
|
|
431
|
+
### Bugs Fixed
|
|
432
|
+
- Adversarial Conversation simulations would fail with `Forbidden`. Added logic to re-fetch token in the exponential retry logic to retrive RAI Service response.
|
|
433
|
+
|
|
434
|
+
### Other Changes
|
|
435
|
+
- Enhance the error message to provide clearer instruction when required packages for the remote tracking feature are missing.
|
|
436
|
+
|
|
429
437
|
## 1.0.0b3 (2024-10-01)
|
|
430
438
|
|
|
431
439
|
### Features Added
|
|
@@ -480,9 +488,29 @@ evaluate(
|
|
|
480
488
|
)
|
|
481
489
|
```
|
|
482
490
|
|
|
491
|
+
- Simulator now requires a model configuration to call the prompty instead of an Azure AI project scope. This enables the usage of simulator with Entra ID based auth.
|
|
492
|
+
Before:
|
|
493
|
+
```python
|
|
494
|
+
azure_ai_project = {
|
|
495
|
+
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
|
|
496
|
+
"resource_group_name": os.environ.get("RESOURCE_GROUP"),
|
|
497
|
+
"project_name": os.environ.get("PROJECT_NAME"),
|
|
498
|
+
}
|
|
499
|
+
sim = Simulator(azure_ai_project=azure_ai_project, credentails=DefaultAzureCredentials())
|
|
500
|
+
```
|
|
501
|
+
After:
|
|
502
|
+
```python
|
|
503
|
+
model_config = {
|
|
504
|
+
"azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
|
|
505
|
+
"azure_deployment": os.environ.get("AZURE_DEPLOYMENT"),
|
|
506
|
+
}
|
|
507
|
+
sim = Simulator(model_config=model_config)
|
|
508
|
+
```
|
|
509
|
+
If `api_key` is not included in the `model_config`, the prompty runtime in `promptflow-core` will pick up `DefaultAzureCredential`.
|
|
510
|
+
|
|
483
511
|
### Bugs Fixed
|
|
484
512
|
|
|
485
|
-
- Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration`
|
|
513
|
+
- Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration`
|
|
486
514
|
|
|
487
515
|
## 1.0.0b2 (2024-09-24)
|
|
488
516
|
|
|
@@ -495,9 +523,9 @@ evaluate(
|
|
|
495
523
|
### Breaking Changes
|
|
496
524
|
|
|
497
525
|
- The `synthetic` namespace has been renamed to `simulator`, and sub-namespaces under this module have been removed
|
|
498
|
-
- The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
|
|
526
|
+
- The `evaluate` and `evaluators` namespaces have been removed, and everything previously exposed in those modules has been added to the root namespace `azure.ai.evaluation`
|
|
499
527
|
- The parameter name `project_scope` in content safety evaluators have been renamed to `azure_ai_project` for consistency with evaluate API and simulators.
|
|
500
|
-
- Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
|
|
528
|
+
- Model configurations classes are now of type `TypedDict` and are exposed in the `azure.ai.evaluation` module instead of coming from `promptflow.core`.
|
|
501
529
|
- Updated the parameter names for `question` and `answer` in built-in evaluators to more generic terms: `query` and `response`.
|
|
502
530
|
|
|
503
531
|
### Features Added
|
|
@@ -119,11 +119,6 @@ name: ApplicationPrompty
|
|
|
119
119
|
description: Simulates an application
|
|
120
120
|
model:
|
|
121
121
|
api: chat
|
|
122
|
-
configuration:
|
|
123
|
-
type: azure_openai
|
|
124
|
-
azure_deployment: ${env:AZURE_DEPLOYMENT}
|
|
125
|
-
api_key: ${env:AZURE_OPENAI_API_KEY}
|
|
126
|
-
azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
|
|
127
122
|
parameters:
|
|
128
123
|
temperature: 0.0
|
|
129
124
|
top_p: 1.0
|
|
@@ -152,52 +147,55 @@ import asyncio
|
|
|
152
147
|
from typing import Any, Dict, List, Optional
|
|
153
148
|
from azure.ai.evaluation.simulator import Simulator
|
|
154
149
|
from promptflow.client import load_flow
|
|
155
|
-
from azure.identity import DefaultAzureCredential
|
|
156
150
|
import os
|
|
151
|
+
import wikipedia
|
|
157
152
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
"
|
|
161
|
-
"
|
|
153
|
+
# Set up the model configuration without api_key, using DefaultAzureCredential
|
|
154
|
+
model_config = {
|
|
155
|
+
"azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
|
|
156
|
+
"azure_deployment": os.environ.get("AZURE_DEPLOYMENT"),
|
|
157
|
+
# not providing key would make the SDK pick up `DefaultAzureCredential`
|
|
158
|
+
# use "api_key": "<your API key>"
|
|
162
159
|
}
|
|
163
160
|
|
|
164
|
-
|
|
165
|
-
wiki_search_term = "Leonardo da
|
|
161
|
+
# Use Wikipedia to get some text for the simulation
|
|
162
|
+
wiki_search_term = "Leonardo da Vinci"
|
|
166
163
|
wiki_title = wikipedia.search(wiki_search_term)[0]
|
|
167
164
|
wiki_page = wikipedia.page(wiki_title)
|
|
168
165
|
text = wiki_page.summary[:1000]
|
|
169
166
|
|
|
170
|
-
def method_to_invoke_application_prompty(query: str):
|
|
167
|
+
def method_to_invoke_application_prompty(query: str, messages_list: List[Dict], context: Optional[Dict]):
|
|
171
168
|
try:
|
|
172
169
|
current_dir = os.path.dirname(__file__)
|
|
173
170
|
prompty_path = os.path.join(current_dir, "application.prompty")
|
|
174
|
-
_flow = load_flow(
|
|
175
|
-
|
|
176
|
-
|
|
171
|
+
_flow = load_flow(
|
|
172
|
+
source=prompty_path,
|
|
173
|
+
model=model_config,
|
|
174
|
+
credential=DefaultAzureCredential()
|
|
175
|
+
)
|
|
177
176
|
response = _flow(
|
|
178
177
|
query=query,
|
|
179
178
|
context=context,
|
|
180
179
|
conversation_history=messages_list
|
|
181
180
|
)
|
|
182
181
|
return response
|
|
183
|
-
except:
|
|
184
|
-
print("Something went wrong invoking the prompty")
|
|
182
|
+
except Exception as e:
|
|
183
|
+
print(f"Something went wrong invoking the prompty: {e}")
|
|
185
184
|
return "something went wrong"
|
|
186
185
|
|
|
187
186
|
async def callback(
|
|
188
|
-
messages: List[Dict],
|
|
187
|
+
messages: Dict[str, List[Dict]],
|
|
189
188
|
stream: bool = False,
|
|
190
189
|
session_state: Any = None, # noqa: ANN401
|
|
191
190
|
context: Optional[Dict[str, Any]] = None,
|
|
192
191
|
) -> dict:
|
|
193
192
|
messages_list = messages["messages"]
|
|
194
|
-
#
|
|
193
|
+
# Get the last message from the user
|
|
195
194
|
latest_message = messages_list[-1]
|
|
196
195
|
query = latest_message["content"]
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
response
|
|
200
|
-
# we are formatting the response to follow the openAI chat protocol format
|
|
196
|
+
# Call your endpoint or AI application here
|
|
197
|
+
response = method_to_invoke_application_prompty(query, messages_list, context)
|
|
198
|
+
# Format the response to follow the OpenAI chat protocol format
|
|
201
199
|
formatted_response = {
|
|
202
200
|
"content": response,
|
|
203
201
|
"role": "assistant",
|
|
@@ -208,10 +206,8 @@ async def callback(
|
|
|
208
206
|
messages["messages"].append(formatted_response)
|
|
209
207
|
return {"messages": messages["messages"], "stream": stream, "session_state": session_state, "context": context}
|
|
210
208
|
|
|
211
|
-
|
|
212
|
-
|
|
213
209
|
async def main():
|
|
214
|
-
simulator = Simulator(
|
|
210
|
+
simulator = Simulator(model_config=model_config)
|
|
215
211
|
outputs = await simulator(
|
|
216
212
|
target=callback,
|
|
217
213
|
text=text,
|
|
@@ -222,17 +218,17 @@ async def main():
|
|
|
222
218
|
f"I am a teacher and I want to teach my students about {wiki_search_term}"
|
|
223
219
|
],
|
|
224
220
|
)
|
|
225
|
-
print(json.dumps(outputs))
|
|
221
|
+
print(json.dumps(outputs, indent=2))
|
|
226
222
|
|
|
227
223
|
if __name__ == "__main__":
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
os.environ["
|
|
232
|
-
os.environ["
|
|
233
|
-
os.environ["AZURE_DEPLOYMENT"] = ""
|
|
224
|
+
# Ensure that the following environment variables are set in your environment:
|
|
225
|
+
# AZURE_OPENAI_ENDPOINT and AZURE_DEPLOYMENT
|
|
226
|
+
# Example:
|
|
227
|
+
# os.environ["AZURE_OPENAI_ENDPOINT"] = "https://your-endpoint.openai.azure.com/"
|
|
228
|
+
# os.environ["AZURE_DEPLOYMENT"] = "your-deployment-name"
|
|
234
229
|
asyncio.run(main())
|
|
235
230
|
print("done!")
|
|
231
|
+
|
|
236
232
|
```
|
|
237
233
|
|
|
238
234
|
#### Adversarial Simulator
|
{azure_ai_evaluation-1.0.0b3 → azure_ai_evaluation-1.0.0b4}/azure/ai/evaluation/_common/constants.py
RENAMED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
from enum import Enum
|
|
5
5
|
|
|
6
|
+
from azure.core import CaseInsensitiveEnumMeta
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
class CommonConstants:
|
|
8
10
|
"""Define common constants."""
|
|
@@ -43,7 +45,7 @@ class _InternalAnnotationTasks:
|
|
|
43
45
|
ECI = "eci"
|
|
44
46
|
|
|
45
47
|
|
|
46
|
-
class EvaluationMetrics:
|
|
48
|
+
class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
47
49
|
"""Evaluation metrics to aid the RAI service in determining what
|
|
48
50
|
metrics to request, and how to present them back to the user."""
|
|
49
51
|
|
|
@@ -56,7 +58,7 @@ class EvaluationMetrics:
|
|
|
56
58
|
XPIA = "xpia"
|
|
57
59
|
|
|
58
60
|
|
|
59
|
-
class _InternalEvaluationMetrics:
|
|
61
|
+
class _InternalEvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
60
62
|
"""Evaluation metrics that are not publicly supported.
|
|
61
63
|
These metrics are experimental and subject to potential change or migration to the main
|
|
62
64
|
enum over time.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import math
|
|
6
|
+
from typing import List
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def list_sum(lst: List[float]) -> float:
|
|
10
|
+
return sum(lst)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def list_mean(lst: List[float]) -> float:
|
|
14
|
+
return list_sum(lst) / len(lst)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def list_mean_nan_safe(lst: List[float]) -> float:
|
|
18
|
+
return list_mean([l for l in lst if not math.isnan(l)])
|