azure-ai-evaluation 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (123) hide show
  1. azure/ai/evaluation/__init__.py +9 -0
  2. azure/ai/evaluation/_aoai/__init__.py +10 -0
  3. azure/ai/evaluation/_aoai/aoai_grader.py +89 -0
  4. azure/ai/evaluation/_aoai/label_grader.py +66 -0
  5. azure/ai/evaluation/_aoai/string_check_grader.py +65 -0
  6. azure/ai/evaluation/_aoai/text_similarity_grader.py +88 -0
  7. azure/ai/evaluation/_azure/_clients.py +4 -4
  8. azure/ai/evaluation/_azure/_envs.py +208 -0
  9. azure/ai/evaluation/_azure/_token_manager.py +12 -7
  10. azure/ai/evaluation/_common/__init__.py +5 -0
  11. azure/ai/evaluation/_common/evaluation_onedp_client.py +118 -0
  12. azure/ai/evaluation/_common/onedp/__init__.py +32 -0
  13. azure/ai/evaluation/_common/onedp/_client.py +139 -0
  14. azure/ai/evaluation/_common/onedp/_configuration.py +73 -0
  15. azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
  16. azure/ai/evaluation/_common/onedp/_patch.py +21 -0
  17. azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
  18. azure/ai/evaluation/_common/onedp/_types.py +21 -0
  19. azure/ai/evaluation/_common/onedp/_validation.py +50 -0
  20. azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
  21. azure/ai/evaluation/_common/onedp/_version.py +9 -0
  22. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
  23. azure/ai/evaluation/_common/onedp/aio/_client.py +143 -0
  24. azure/ai/evaluation/_common/onedp/aio/_configuration.py +75 -0
  25. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
  26. azure/ai/evaluation/_common/onedp/aio/_vendor.py +40 -0
  27. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +39 -0
  28. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4494 -0
  29. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
  30. azure/ai/evaluation/_common/onedp/models/__init__.py +142 -0
  31. azure/ai/evaluation/_common/onedp/models/_enums.py +162 -0
  32. azure/ai/evaluation/_common/onedp/models/_models.py +2228 -0
  33. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
  34. azure/ai/evaluation/_common/onedp/operations/__init__.py +39 -0
  35. azure/ai/evaluation/_common/onedp/operations/_operations.py +5655 -0
  36. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
  37. azure/ai/evaluation/_common/onedp/py.typed +1 -0
  38. azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
  39. azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
  40. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
  41. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
  42. azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
  43. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
  44. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
  45. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
  46. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
  47. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
  48. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
  49. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
  50. azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
  51. azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
  52. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
  53. azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
  54. azure/ai/evaluation/_common/rai_service.py +158 -28
  55. azure/ai/evaluation/_common/raiclient/_version.py +1 -1
  56. azure/ai/evaluation/_common/utils.py +79 -1
  57. azure/ai/evaluation/_constants.py +16 -0
  58. azure/ai/evaluation/_eval_mapping.py +71 -0
  59. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +30 -16
  60. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +8 -0
  61. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +5 -0
  62. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +17 -1
  63. azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
  64. azure/ai/evaluation/_evaluate/_evaluate.py +325 -74
  65. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +534 -0
  66. azure/ai/evaluation/_evaluate/_utils.py +117 -4
  67. azure/ai/evaluation/_evaluators/_common/_base_eval.py +8 -3
  68. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -3
  69. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +2 -2
  70. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +11 -0
  71. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +467 -0
  72. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +1 -1
  73. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +1 -1
  74. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +6 -2
  75. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +1 -1
  76. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +7 -2
  77. azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +31 -46
  78. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +1 -1
  79. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +5 -2
  80. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +6 -2
  81. azure/ai/evaluation/_exceptions.py +2 -0
  82. azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -14
  83. azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
  84. azure/ai/evaluation/_legacy/_adapters/_flows.py +1 -1
  85. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +51 -32
  86. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +114 -8
  87. azure/ai/evaluation/_legacy/_batch_engine/_result.py +6 -0
  88. azure/ai/evaluation/_legacy/_batch_engine/_run.py +6 -0
  89. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +69 -29
  90. azure/ai/evaluation/_legacy/_batch_engine/_trace.py +54 -62
  91. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +19 -1
  92. azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
  93. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +124 -0
  94. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +15 -0
  95. azure/ai/evaluation/_legacy/prompty/_connection.py +11 -74
  96. azure/ai/evaluation/_legacy/prompty/_exceptions.py +80 -0
  97. azure/ai/evaluation/_legacy/prompty/_prompty.py +119 -9
  98. azure/ai/evaluation/_legacy/prompty/_utils.py +72 -2
  99. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +90 -17
  100. azure/ai/evaluation/_version.py +1 -1
  101. azure/ai/evaluation/red_team/_attack_strategy.py +1 -1
  102. azure/ai/evaluation/red_team/_red_team.py +825 -450
  103. azure/ai/evaluation/red_team/_utils/metric_mapping.py +23 -0
  104. azure/ai/evaluation/red_team/_utils/strategy_utils.py +1 -1
  105. azure/ai/evaluation/simulator/_adversarial_simulator.py +63 -39
  106. azure/ai/evaluation/simulator/_constants.py +1 -0
  107. azure/ai/evaluation/simulator/_conversation/__init__.py +13 -6
  108. azure/ai/evaluation/simulator/_conversation/_conversation.py +2 -1
  109. azure/ai/evaluation/simulator/_direct_attack_simulator.py +35 -22
  110. azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
  111. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +40 -25
  112. azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
  113. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +24 -18
  114. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +5 -10
  115. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +65 -41
  116. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +9 -5
  117. azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
  118. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/METADATA +25 -2
  119. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/RECORD +123 -65
  120. /azure/ai/evaluation/_legacy/{_batch_engine → _common}/_logging.py +0 -0
  121. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/NOTICE.txt +0 -0
  122. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/WHEEL +0 -0
  123. {azure_ai_evaluation-1.5.0.dist-info → azure_ai_evaluation-1.6.0.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@ from pathlib import Path
10
10
  from typing import Any, Dict, NamedTuple, Optional, Union, cast
11
11
  import uuid
12
12
  import base64
13
+ import math
13
14
 
14
15
  import pandas as pd
15
16
  from azure.ai.evaluation._legacy._adapters.entities import Run
@@ -126,6 +127,82 @@ def process_message_content(content, images_folder_path):
126
127
  f.write(image_data_binary)
127
128
  return None
128
129
 
130
+ def _log_metrics_and_instance_results_onedp(
131
+ metrics: Dict[str, Any],
132
+ instance_results: pd.DataFrame,
133
+ project_url: str,
134
+ evaluation_name: Optional[str],
135
+ name_map: Dict[str, str],
136
+ **kwargs,
137
+ ) -> Optional[str]:
138
+
139
+ # One RP Client
140
+ from azure.ai.evaluation._azure._token_manager import AzureMLTokenManager
141
+ from azure.ai.evaluation._constants import TokenScope
142
+ from azure.ai.evaluation._common import EvaluationServiceOneDPClient, EvaluationUpload
143
+
144
+ credentials = AzureMLTokenManager(
145
+ TokenScope.COGNITIVE_SERVICES_MANAGEMENT.value, LOGGER, credential=kwargs.get("credential")
146
+ )
147
+ client = EvaluationServiceOneDPClient(
148
+ endpoint=project_url,
149
+ credential=credentials
150
+ )
151
+
152
+ # Massaging before artifacts are put on disk
153
+ # Adding line_number as index column this is needed by UI to form link to individual instance run
154
+ instance_results["line_number"] = instance_results.index.values
155
+
156
+ artifact_name = "instance_results.jsonl"
157
+
158
+ with tempfile.TemporaryDirectory() as tmpdir:
159
+ # storing multi_modal images if exists
160
+ col_name = "inputs.conversation"
161
+ if col_name in instance_results.columns:
162
+ for item in instance_results[col_name].items():
163
+ value = item[1]
164
+ if "messages" in value:
165
+ _store_multimodal_content(value["messages"], tmpdir)
166
+
167
+ # storing artifact result
168
+ tmp_path = os.path.join(tmpdir, artifact_name)
169
+
170
+ with open(tmp_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
171
+ f.write(instance_results.to_json(orient="records", lines=True))
172
+
173
+ properties = {
174
+ EvaluationRunProperties.RUN_TYPE: "eval_run",
175
+ EvaluationRunProperties.EVALUATION_RUN: "promptflow.BatchRun",
176
+ EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
177
+ "_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
178
+ }
179
+ properties.update(_convert_name_map_into_property_entries(name_map))
180
+
181
+ create_evaluation_result_response = client.create_evaluation_result(
182
+ name=uuid.uuid4(),
183
+ path=tmpdir,
184
+ metrics=metrics
185
+ )
186
+
187
+ upload_run_response = client.start_evaluation_run(
188
+ evaluation=EvaluationUpload(
189
+ display_name=evaluation_name,
190
+ )
191
+ )
192
+
193
+ update_run_response = client.update_evaluation_run(
194
+ name=upload_run_response.id,
195
+ evaluation=EvaluationUpload(
196
+ display_name=evaluation_name,
197
+ status="Completed",
198
+ outputs={
199
+ 'evaluationResultId': create_evaluation_result_response.id,
200
+ },
201
+ properties=properties,
202
+ )
203
+ )
204
+
205
+ return update_run_response.properties.get("AiStudioEvaluationUri")
129
206
 
130
207
  def _log_metrics_and_instance_results(
131
208
  metrics: Dict[str, Any],
@@ -133,6 +210,7 @@ def _log_metrics_and_instance_results(
133
210
  trace_destination: Optional[str],
134
211
  run: Optional[Run],
135
212
  evaluation_name: Optional[str],
213
+ name_map: Dict[str, str],
136
214
  **kwargs,
137
215
  ) -> Optional[str]:
138
216
  from azure.ai.evaluation._evaluate._eval_run import EvalRun
@@ -187,14 +265,14 @@ def _log_metrics_and_instance_results(
187
265
  # adding these properties to avoid showing traces if a dummy run is created.
188
266
  # We are doing that only for the pure evaluation runs.
189
267
  if run is None:
190
- ev_run.write_properties_to_run_history(
191
- properties={
268
+ properties = {
192
269
  EvaluationRunProperties.RUN_TYPE: "eval_run",
193
270
  EvaluationRunProperties.EVALUATION_RUN: "promptflow.BatchRun",
194
271
  EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
195
272
  "_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
196
273
  }
197
- )
274
+ properties.update(_convert_name_map_into_property_entries(name_map))
275
+ ev_run.write_properties_to_run_history(properties=properties)
198
276
  else:
199
277
  ev_run.write_properties_to_run_history(
200
278
  properties={
@@ -241,7 +319,7 @@ def _write_output(path: Union[str, os.PathLike], data_dict: Any) -> None:
241
319
  p = p / DEFAULT_EVALUATION_RESULTS_FILE_NAME
242
320
 
243
321
  with open(p, "w", encoding=DefaultOpenEncoding.WRITE) as f:
244
- json.dump(data_dict, f)
322
+ json.dump(data_dict, f, ensure_ascii=False)
245
323
 
246
324
  print(f'Evaluation results saved to "{p.resolve()}".\n')
247
325
 
@@ -329,6 +407,41 @@ def set_event_loop_policy() -> None:
329
407
  # On Windows seems to be a problem with EventLoopPolicy, use this snippet to work around it
330
408
  asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) # type: ignore[attr-defined]
331
409
 
410
+ # textwrap.wrap tries to do fancy nonsense that we don't want
411
+ def _wrap(s, w):
412
+ return [s[i:i + w] for i in range(0, len(s), w)]
413
+
414
+ def _convert_name_map_into_property_entries(
415
+ name_map: Dict[str, str], segment_length: int = 950, max_segments: int = 10
416
+ ) -> Dict[str, Any]:
417
+ """
418
+ Convert the name map into property entries.
419
+
420
+ :param name_map: The name map to be converted.
421
+ :type name_map: Dict[str, str]
422
+ :param segment_length: The max length of each individual segment,
423
+ which will each have their own dictionary entry
424
+ :type segment_length: str
425
+ :param max_segments: The max number of segments we can have. If the stringified
426
+ name map is too long, we just return a length entry with a value
427
+ of -1 to indicate that the map was too long.
428
+ :type max_segments: str
429
+ :return: The converted name map.
430
+ :rtype: Dict[str, Any]
431
+ """
432
+ name_map_string = json.dumps(name_map)
433
+ num_segments = math.ceil(len(name_map_string) / segment_length)
434
+ # Property map is somehow still too long to encode within the space
435
+ # we allow, so give up, but make sure the service knows we gave up
436
+ if (num_segments > max_segments):
437
+ return {EvaluationRunProperties.NAME_MAP_LENGTH: -1}
438
+
439
+ result: Dict[str, Any] = {EvaluationRunProperties.NAME_MAP_LENGTH: num_segments}
440
+ segments_list = _wrap(name_map_string, segment_length)
441
+ for i in range(0, num_segments):
442
+ segment_key = f"{EvaluationRunProperties.NAME_MAP}_{i}"
443
+ result[segment_key] = segments_list[i]
444
+ return result
332
445
 
333
446
  class JSONLDataFileLoader:
334
447
  def __init__(self, filename: Union[os.PathLike, str]):
@@ -495,7 +495,8 @@ class AsyncEvaluatorBase:
495
495
  # are known to throw at this, mash them into kwargs, and then pass them into the real call.
496
496
  async def __call__(
497
497
  self, *, query=None, response=None, context=None, conversation=None, ground_truth=None,
498
- tool_call=None, tool_definitions=None, messages=None, **kwargs
498
+ tool_calls=None, tool_definitions=None, messages=None, retrieval_ground_truth=None,
499
+ retrieved_documents=None,**kwargs
499
500
  ):
500
501
  if conversation is not None:
501
502
  kwargs["conversation"] = conversation
@@ -509,11 +510,15 @@ class AsyncEvaluatorBase:
509
510
  kwargs["context"] = context
510
511
  if ground_truth is not None:
511
512
  kwargs["ground_truth"] = ground_truth
512
- if tool_call is not None:
513
- kwargs["tool_call"] = tool_call
513
+ if tool_calls is not None:
514
+ kwargs["tool_calls"] = tool_calls
514
515
  if tool_definitions is not None:
515
516
  kwargs["tool_definitions"] = tool_definitions
516
517
  if messages is not None:
517
518
  kwargs["messages"] = messages
519
+ if retrieval_ground_truth is not None:
520
+ kwargs["retrieval_ground_truth"] = retrieval_ground_truth
521
+ if retrieved_documents is not None:
522
+ kwargs["retrieved_documents"] = retrieved_documents
518
523
 
519
524
  return await self._real_call(**kwargs)
@@ -4,9 +4,13 @@
4
4
 
5
5
  import math
6
6
  import re
7
+ import os
7
8
  from typing import Dict, TypeVar, Union
8
9
 
9
- from azure.ai.evaluation._legacy.prompty import AsyncPrompty
10
+ if os.getenv("AI_EVALS_USE_PF_PROMPTY", "false").lower() == "true":
11
+ from promptflow.core._flow import AsyncPrompty
12
+ else:
13
+ from azure.ai.evaluation._legacy.prompty import AsyncPrompty
10
14
  from typing_extensions import override
11
15
 
12
16
  from azure.ai.evaluation._common.constants import PROMPT_BASED_REASON_EVALUATORS
@@ -39,13 +43,17 @@ class PromptyEvaluatorBase(EvaluatorBase[T]):
39
43
  :param ignore_queries: If True, queries will be ignored in conversation evaluations. Default is False.
40
44
  Useful since some evaluators of this format are response-only.
41
45
  :type ignore_queries: bool
46
+ :keyword is_reasoning_model: This parameter is in preview. If True, updates the config parameters in prompty file based on reasoning models. Defaults to False.
47
+ :type is_reasoning_model: bool
42
48
  """
43
49
 
44
50
  _LLM_CALL_TIMEOUT = 600
45
51
  _DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
46
52
 
47
- def __init__(self, *, result_key: str, prompty_file: str, model_config: dict, eval_last_turn: bool = False, threshold: int = 3, _higher_is_better: bool = False):
53
+ def __init__(self, *, result_key: str, prompty_file: str, model_config: dict, eval_last_turn: bool = False,
54
+ threshold: int = 3, _higher_is_better: bool = False, **kwargs) -> None:
48
55
  self._result_key = result_key
56
+ self._is_reasoning_model = kwargs.get("is_reasoning_model", False)
49
57
  self._prompty_file = prompty_file
50
58
  self._threshold = threshold
51
59
  self._higher_is_better = _higher_is_better
@@ -59,7 +67,8 @@ class PromptyEvaluatorBase(EvaluatorBase[T]):
59
67
  user_agent,
60
68
  )
61
69
 
62
- self._flow = AsyncPrompty.load(source=prompty_file, model=prompty_model_config)
70
+ self._flow = AsyncPrompty.load(source=self._prompty_file, model=prompty_model_config,
71
+ is_reasoning_model=self._is_reasoning_model)
63
72
 
64
73
  # __call__ not overridden here because child classes have such varied signatures that there's no point
65
74
  # defining a default here.
@@ -12,7 +12,7 @@ from azure.ai.evaluation._common.constants import (
12
12
  _InternalAnnotationTasks,
13
13
  )
14
14
  from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service, evaluate_with_rai_service_multimodal
15
- from azure.ai.evaluation._common.utils import validate_azure_ai_project
15
+ from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
16
16
  from azure.ai.evaluation._exceptions import EvaluationException
17
17
  from azure.ai.evaluation._common.utils import validate_conversation
18
18
  from azure.ai.evaluation._constants import _AggregationType
@@ -50,7 +50,7 @@ class RaiServiceEvaluatorBase(EvaluatorBase[T]):
50
50
  def __init__(
51
51
  self,
52
52
  eval_metric: Union[EvaluationMetrics, _InternalEvaluationMetrics],
53
- azure_ai_project: dict,
53
+ azure_ai_project: Union[dict, str],
54
54
  credential: TokenCredential,
55
55
  eval_last_turn: bool = False,
56
56
  conversation_aggregation_type: _AggregationType = _AggregationType.MEAN,
@@ -0,0 +1,11 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------
4
+
5
+ from ._document_retrieval import DocumentRetrievalEvaluator, RetrievalGroundTruthDocument, RetrievedDocument
6
+
7
+ __all__ = [
8
+ "DocumentRetrievalEvaluator",
9
+ "RetrievalGroundTruthDocument",
10
+ "RetrievedDocument"
11
+ ]