azure-ai-evaluation 1.0.0b3__py3-none-any.whl → 1.0.0b5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (93) hide show
  1. azure/ai/evaluation/__init__.py +23 -1
  2. azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +20 -9
  3. azure/ai/evaluation/_common/constants.py +9 -2
  4. azure/ai/evaluation/_common/math.py +29 -0
  5. azure/ai/evaluation/_common/rai_service.py +222 -93
  6. azure/ai/evaluation/_common/utils.py +328 -19
  7. azure/ai/evaluation/_constants.py +16 -8
  8. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/__init__.py +3 -2
  9. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +33 -17
  10. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +14 -7
  11. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/proxy_client.py +22 -4
  12. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +35 -0
  13. azure/ai/evaluation/_evaluate/_eval_run.py +47 -14
  14. azure/ai/evaluation/_evaluate/_evaluate.py +370 -188
  15. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +15 -16
  16. azure/ai/evaluation/_evaluate/_utils.py +77 -25
  17. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
  18. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +16 -10
  19. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -34
  20. azure/ai/evaluation/_evaluators/_common/_base_eval.py +76 -46
  21. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +26 -19
  22. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +62 -25
  23. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +68 -36
  24. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +67 -46
  25. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +33 -4
  26. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +33 -4
  27. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +33 -4
  28. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +33 -4
  29. azure/ai/evaluation/_evaluators/_eci/_eci.py +7 -5
  30. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +14 -6
  31. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +22 -21
  32. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -36
  33. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
  34. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +51 -16
  35. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
  36. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
  37. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +3 -7
  38. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
  39. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +130 -0
  40. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +57 -0
  41. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +96 -0
  42. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +120 -0
  43. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +96 -0
  44. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +96 -0
  45. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +96 -0
  46. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +46 -13
  47. azure/ai/evaluation/_evaluators/_qa/_qa.py +11 -6
  48. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +23 -20
  49. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +78 -42
  50. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +126 -80
  51. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +74 -24
  52. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +2 -2
  53. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  54. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +150 -0
  55. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +32 -15
  56. azure/ai/evaluation/_evaluators/_xpia/xpia.py +36 -10
  57. azure/ai/evaluation/_exceptions.py +26 -6
  58. azure/ai/evaluation/_http_utils.py +203 -132
  59. azure/ai/evaluation/_model_configurations.py +23 -6
  60. azure/ai/evaluation/_vendor/__init__.py +3 -0
  61. azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
  62. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +328 -0
  63. azure/ai/evaluation/_vendor/rouge_score/scoring.py +63 -0
  64. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +63 -0
  65. azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
  66. azure/ai/evaluation/_version.py +1 -1
  67. azure/ai/evaluation/simulator/__init__.py +2 -1
  68. azure/ai/evaluation/simulator/_adversarial_scenario.py +5 -0
  69. azure/ai/evaluation/simulator/_adversarial_simulator.py +88 -60
  70. azure/ai/evaluation/simulator/_conversation/__init__.py +13 -12
  71. azure/ai/evaluation/simulator/_conversation/_conversation.py +4 -4
  72. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  73. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  74. azure/ai/evaluation/simulator/_direct_attack_simulator.py +24 -66
  75. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  76. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
  77. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +98 -95
  78. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +67 -21
  79. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +28 -11
  80. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +68 -24
  81. azure/ai/evaluation/simulator/_model_tools/models.py +10 -10
  82. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +4 -9
  83. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
  84. azure/ai/evaluation/simulator/_simulator.py +222 -169
  85. azure/ai/evaluation/simulator/_tracing.py +4 -4
  86. azure/ai/evaluation/simulator/_utils.py +6 -6
  87. {azure_ai_evaluation-1.0.0b3.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/METADATA +237 -52
  88. azure_ai_evaluation-1.0.0b5.dist-info/NOTICE.txt +70 -0
  89. azure_ai_evaluation-1.0.0b5.dist-info/RECORD +120 -0
  90. {azure_ai_evaluation-1.0.0b3.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/WHEEL +1 -1
  91. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -49
  92. azure_ai_evaluation-1.0.0b3.dist-info/RECORD +0 -98
  93. {azure_ai_evaluation-1.0.0b3.dist-info → azure_ai_evaluation-1.0.0b5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,35 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+ import os
5
+ import types
6
+ from typing import Optional, Type
7
+
8
+ from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
9
+
10
+
11
+ class TargetRunContext:
12
+ """Context manager for target batch run.
13
+
14
+ :param upload_snapshot: Whether to upload target snapshot.
15
+ :type upload_snapshot: bool
16
+ """
17
+
18
+ def __init__(self, upload_snapshot: bool) -> None:
19
+ self._upload_snapshot = upload_snapshot
20
+
21
+ def __enter__(self) -> None:
22
+ # Address "[WinError 32] The process cannot access the file" error,
23
+ # caused by conflicts when the venv and target function are in the same directory.
24
+ # Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml).
25
+ if not self._upload_snapshot:
26
+ os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
27
+
28
+ def __exit__(
29
+ self,
30
+ exc_type: Optional[Type[BaseException]],
31
+ exc_value: Optional[BaseException],
32
+ exc_tb: Optional[types.TracebackType],
33
+ ) -> None:
34
+ if not self._upload_snapshot:
35
+ os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
@@ -10,16 +10,18 @@ import posixpath
10
10
  import time
11
11
  import types
12
12
  import uuid
13
- from typing import Any, Dict, Optional, Set, Type
13
+ from typing import Any, Dict, List, Optional, Set, Type
14
14
  from urllib.parse import urlparse
15
15
 
16
16
  from promptflow._sdk.entities import Run
17
+ from typing_extensions import Self
17
18
 
18
19
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
19
20
  from azure.ai.evaluation._http_utils import get_http_client
20
21
  from azure.ai.evaluation._version import VERSION
21
22
  from azure.core.pipeline.policies import RetryPolicy
22
23
  from azure.core.rest import HttpResponse
24
+ from azure.core.exceptions import HttpResponseError
23
25
 
24
26
  LOGGER = logging.getLogger(__name__)
25
27
 
@@ -27,6 +29,7 @@ LOGGER = logging.getLogger(__name__)
27
29
  # Handle optional import. The azure libraries are only present if
28
30
  # promptflow-azure is installed.
29
31
  try:
32
+ from azure.ai.ml import MLClient
30
33
  from azure.ai.ml.entities._credentials import AccountKeyConfiguration # pylint: disable=ungrouped-imports
31
34
  from azure.ai.ml.entities._datastore.datastore import Datastore
32
35
  from azure.storage.blob import BlobServiceClient
@@ -121,8 +124,8 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
121
124
  self._run_name = run_name
122
125
  self._promptflow_run = promptflow_run
123
126
  self._status = RunStatus.NOT_STARTED
124
- self._url_base = None
125
- self.info = None
127
+ self._url_base: Optional[str] = None
128
+ self._info: Optional[RunInfo] = None
126
129
 
127
130
  @property
128
131
  def status(self) -> RunStatus:
@@ -134,6 +137,20 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
134
137
  """
135
138
  return self._status
136
139
 
140
+ @property
141
+ def info(self) -> RunInfo:
142
+ if self._info is None:
143
+ msg = "Run info is missing"
144
+ raise EvaluationException(
145
+ message=msg,
146
+ internal_message=msg,
147
+ target=ErrorTarget.EVAL_RUN,
148
+ category=ErrorCategory.UNKNOWN,
149
+ blame=ErrorBlame.UNKNOWN,
150
+ )
151
+
152
+ return self._info
153
+
137
154
  def _get_scope(self) -> str:
138
155
  """
139
156
  Return the scope information for the workspace.
@@ -161,11 +178,11 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
161
178
  )
162
179
  self._url_base = None
163
180
  self._status = RunStatus.BROKEN
164
- self.info = RunInfo.generate(self._run_name)
181
+ self._info = RunInfo.generate(self._run_name)
165
182
  else:
166
183
  self._url_base = urlparse(self._tracking_uri).netloc
167
184
  if self._promptflow_run is not None:
168
- self.info = RunInfo(
185
+ self._info = RunInfo(
169
186
  self._promptflow_run.name,
170
187
  self._promptflow_run._experiment_name, # pylint: disable=protected-access
171
188
  self._promptflow_run.name,
@@ -182,7 +199,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
182
199
  body["run_name"] = self._run_name
183
200
  response = self.request_with_retry(url=url, method="POST", json_dict=body)
184
201
  if response.status_code != 200:
185
- self.info = RunInfo.generate(self._run_name)
202
+ self._info = RunInfo.generate(self._run_name)
186
203
  LOGGER.warning(
187
204
  "The run failed to start: %s: %s."
188
205
  "The results will be saved locally, but will not be logged to Azure.",
@@ -192,7 +209,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
192
209
  self._status = RunStatus.BROKEN
193
210
  else:
194
211
  parsed_response = response.json()
195
- self.info = RunInfo(
212
+ self._info = RunInfo(
196
213
  run_id=parsed_response["run"]["info"]["run_id"],
197
214
  experiment_id=parsed_response["run"]["info"]["experiment_id"],
198
215
  run_name=parsed_response["run"]["info"]["run_name"],
@@ -235,7 +252,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
235
252
  LOGGER.warning("Unable to terminate the run.")
236
253
  self._status = RunStatus.TERMINATED
237
254
 
238
- def __enter__(self):
255
+ def __enter__(self) -> Self:
239
256
  """The Context Manager enter call.
240
257
 
241
258
  :return: The instance of the class.
@@ -249,7 +266,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
249
266
  exc_type: Optional[Type[BaseException]],
250
267
  exc_value: Optional[BaseException],
251
268
  exc_tb: Optional[types.TracebackType],
252
- ) -> Optional[bool]:
269
+ ) -> None:
253
270
  """The context manager exit call.
254
271
 
255
272
  :param exc_type: The exception type
@@ -408,7 +425,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
408
425
  return
409
426
  # First we will list the files and the appropriate remote paths for them.
410
427
  root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.run_name)
411
- remote_paths = {"paths": []}
428
+ remote_paths: Dict[str, List[Dict[str, str]]] = {"paths": []}
412
429
  local_paths = []
413
430
  # Go over the artifact folder and upload all artifacts.
414
431
  for root, _, filenames in os.walk(artifact_folder):
@@ -427,10 +444,26 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
427
444
  datastore = self._ml_client.datastores.get_default(include_secrets=True)
428
445
  account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
429
446
  svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore))
430
- for local, remote in zip(local_paths, remote_paths["paths"]):
431
- blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
432
- with open(local, "rb") as fp:
433
- blob_client.upload_blob(fp, overwrite=True)
447
+ try:
448
+ for local, remote in zip(local_paths, remote_paths["paths"]):
449
+ blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
450
+ with open(local, "rb") as fp:
451
+ blob_client.upload_blob(fp, overwrite=True)
452
+ except HttpResponseError as ex:
453
+ if ex.status_code == 403:
454
+ msg = (
455
+ "Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
456
+ " Please ensure that the necessary access rights are granted."
457
+ )
458
+ raise EvaluationException(
459
+ message=msg,
460
+ target=ErrorTarget.EVAL_RUN,
461
+ category=ErrorCategory.FAILED_REMOTE_TRACKING,
462
+ blame=ErrorBlame.USER_ERROR,
463
+ tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
464
+ ) from ex
465
+
466
+ raise ex
434
467
 
435
468
  # To show artifact in UI we will need to register it. If it is a promptflow run,
436
469
  # we are rewriting already registered artifact and need to skip this step.