evalscope 0.5.5rc1__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (48) hide show
  1. evalscope/backend/__init__.py +0 -3
  2. evalscope/backend/opencompass/tasks/eval_datasets.py +1 -1
  3. evalscope/backend/rag_eval/__init__.py +4 -0
  4. evalscope/backend/rag_eval/backend_manager.py +80 -0
  5. evalscope/backend/rag_eval/clip_benchmark/__init__.py +2 -0
  6. evalscope/backend/rag_eval/clip_benchmark/arguments.py +34 -0
  7. evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py +277 -0
  8. evalscope/backend/rag_eval/clip_benchmark/task_template.py +119 -0
  9. evalscope/backend/rag_eval/clip_benchmark/tasks/__init__.py +0 -0
  10. evalscope/backend/rag_eval/clip_benchmark/tasks/image_caption.py +83 -0
  11. evalscope/backend/rag_eval/clip_benchmark/tasks/zeroshot_classification.py +247 -0
  12. evalscope/backend/rag_eval/clip_benchmark/tasks/zeroshot_retrieval.py +170 -0
  13. evalscope/backend/rag_eval/cmteb/__init__.py +4 -0
  14. evalscope/backend/rag_eval/cmteb/arguments.py +61 -0
  15. evalscope/backend/rag_eval/cmteb/base.py +91 -0
  16. evalscope/backend/rag_eval/cmteb/task_template.py +85 -0
  17. evalscope/backend/rag_eval/cmteb/tasks/Classification.py +302 -0
  18. evalscope/backend/rag_eval/cmteb/tasks/Clustering.py +252 -0
  19. evalscope/backend/rag_eval/cmteb/tasks/CustomTask.py +61 -0
  20. evalscope/backend/rag_eval/cmteb/tasks/PairClassification.py +113 -0
  21. evalscope/backend/rag_eval/cmteb/tasks/Reranking.py +151 -0
  22. evalscope/backend/rag_eval/cmteb/tasks/Retrieval.py +345 -0
  23. evalscope/backend/rag_eval/cmteb/tasks/STS.py +302 -0
  24. evalscope/backend/rag_eval/cmteb/tasks/__init__.py +70 -0
  25. evalscope/backend/rag_eval/ragas/__init__.py +2 -0
  26. evalscope/backend/rag_eval/ragas/arguments.py +47 -0
  27. evalscope/backend/rag_eval/ragas/metrics/__init__.py +2 -0
  28. evalscope/backend/rag_eval/ragas/metrics/multi_modal_faithfulness.py +91 -0
  29. evalscope/backend/rag_eval/ragas/metrics/multi_modal_relevance.py +99 -0
  30. evalscope/backend/rag_eval/ragas/task_template.py +61 -0
  31. evalscope/backend/rag_eval/ragas/tasks/__init__.py +2 -0
  32. evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +263 -0
  33. evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py +72 -0
  34. evalscope/backend/vlm_eval_kit/backend_manager.py +0 -1
  35. evalscope/backend/vlm_eval_kit/custom_dataset.py +1 -1
  36. evalscope/evaluator/evaluator.py +1 -0
  37. evalscope/models/api/openai_api.py +2 -2
  38. evalscope/perf/http_client.py +1 -1
  39. evalscope/perf/openai_api.py +2 -0
  40. evalscope/run.py +4 -0
  41. evalscope/utils/logger.py +44 -14
  42. evalscope/utils/task_utils.py +3 -0
  43. evalscope/version.py +2 -2
  44. {evalscope-0.5.5rc1.dist-info → evalscope-0.6.0.dist-info}/METADATA +95 -99
  45. {evalscope-0.5.5rc1.dist-info → evalscope-0.6.0.dist-info}/RECORD +48 -17
  46. {evalscope-0.5.5rc1.dist-info → evalscope-0.6.0.dist-info}/WHEEL +1 -1
  47. {evalscope-0.5.5rc1.dist-info → evalscope-0.6.0.dist-info}/entry_points.txt +0 -0
  48. {evalscope-0.5.5rc1.dist-info → evalscope-0.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,263 @@
1
+ import os
2
+ import asyncio
3
+ import pandas as pd
4
+ from tqdm import tqdm
5
+ from ragas.llms import LangchainLLMWrapper
6
+ from ragas.embeddings import LangchainEmbeddingsWrapper
7
+ from .translate_prompt import translate_prompts
8
+ from evalscope.utils.logger import get_logger
9
+ from evalscope.backend.rag_eval.ragas.arguments import TestsetGenerationArguments
10
+ from evalscope.backend.rag_eval import EmbeddingModel, LLM, ChatOpenAI
11
+
12
+ os.environ['DO_NOT_TRACK'] = 'true'
13
+
14
+ logger = get_logger()
15
+
16
+
17
+ def get_transform(llm, embedding, language):
18
+ """
19
+ Creates and returns a default set of transforms for processing a knowledge graph.
20
+
21
+ This function defines a series of transformation steps to be applied to a
22
+ knowledge graph, including extracting summaries, keyphrases, titles,
23
+ headlines, and embeddings, as well as building similarity relationships
24
+ between nodes.
25
+
26
+ The transforms are applied in the following order:
27
+ 1. Parallel extraction of summaries and headlines
28
+ 2. Embedding of summaries for document nodes
29
+ 3. Splitting of headlines
30
+ 4. Parallel extraction of embeddings, keyphrases, and titles
31
+ 5. Building cosine similarity relationships between nodes
32
+ 6. Building cosine similarity relationships between summaries
33
+
34
+ Returns
35
+ -------
36
+ Transforms
37
+ A list of transformation steps to be applied to the knowledge graph.
38
+
39
+ """
40
+ from ragas.testset.transforms.engine import Parallel
41
+ from ragas.testset.transforms.extractors import (
42
+ EmbeddingExtractor,
43
+ HeadlinesExtractor,
44
+ KeyphrasesExtractor,
45
+ SummaryExtractor,
46
+ TitleExtractor,
47
+ )
48
+ from ragas.testset.transforms.relationship_builders.cosine import (
49
+ CosineSimilarityBuilder,
50
+ SummaryCosineSimilarityBuilder,
51
+ )
52
+ from ragas.testset.transforms.splitters import HeadlineSplitter
53
+ from ragas.testset.graph import NodeType
54
+
55
+ # define the transforms
56
+ summary_extractor = SummaryExtractor(llm=llm)
57
+ keyphrase_extractor = KeyphrasesExtractor(llm=llm)
58
+ title_extractor = TitleExtractor(llm=llm)
59
+ headline_extractor = HeadlinesExtractor(llm=llm)
60
+
61
+ asyncio.run(
62
+ translate_prompts(
63
+ prompts=[
64
+ summary_extractor,
65
+ keyphrase_extractor,
66
+ title_extractor,
67
+ headline_extractor,
68
+ ],
69
+ target_lang=language,
70
+ llm=llm,
71
+ adapt_instruction=True,
72
+ )
73
+ )
74
+
75
+ embedding_extractor = EmbeddingExtractor(embedding_model=embedding)
76
+ headline_splitter = HeadlineSplitter()
77
+ cosine_sim_builder = CosineSimilarityBuilder(threshold=0.8)
78
+ summary_embedder = EmbeddingExtractor(
79
+ name='summary_embedder',
80
+ filter_nodes=lambda node: True if node.type == NodeType.DOCUMENT else False,
81
+ property_name='summary_embedding',
82
+ embed_property_name='summary',
83
+ embedding_model=embedding,
84
+ )
85
+ summary_cosine_sim_builder = SummaryCosineSimilarityBuilder(threshold=0.6)
86
+
87
+ # specify the transforms and their order to be applied
88
+ transforms = [
89
+ Parallel(summary_extractor, headline_extractor),
90
+ summary_embedder,
91
+ headline_splitter,
92
+ Parallel(embedding_extractor, keyphrase_extractor, title_extractor),
93
+ cosine_sim_builder,
94
+ summary_cosine_sim_builder,
95
+ ]
96
+ return transforms
97
+
98
+
99
+ def get_distribution(llm, distribution, language):
100
+ from ragas.testset.synthesizers.abstract_query import (
101
+ AbstractQuerySynthesizer,
102
+ ComparativeAbstractQuerySynthesizer,
103
+ )
104
+ from ragas.testset.synthesizers.specific_query import SpecificQuerySynthesizer
105
+
106
+ abstract = AbstractQuerySynthesizer(llm=llm)
107
+ comparative = ComparativeAbstractQuerySynthesizer(llm=llm)
108
+ specific = SpecificQuerySynthesizer(llm=llm)
109
+
110
+ asyncio.run(
111
+ translate_prompts(
112
+ prompts=[
113
+ abstract,
114
+ comparative,
115
+ specific,
116
+ ],
117
+ target_lang=language,
118
+ llm=llm,
119
+ adapt_instruction=True,
120
+ )
121
+ )
122
+ return [
123
+ (abstract, distribution['simple']),
124
+ (comparative, distribution['multi_context']),
125
+ (specific, distribution['reasoning']),
126
+ ]
127
+
128
+
129
+ def get_knowledge_graph(documents, transforms, local_file):
130
+ from ragas.testset.graph import KnowledgeGraph, Node, NodeType
131
+ from ragas.testset.transforms import apply_transforms
132
+
133
+ if os.path.exists(local_file):
134
+ logger.info(f'Loading knowledge graph from {local_file}')
135
+ return KnowledgeGraph.load(local_file)
136
+ # convert the documents to Ragas nodes
137
+ nodes = []
138
+ for doc in documents:
139
+ node = Node(
140
+ type=NodeType.DOCUMENT,
141
+ properties={
142
+ 'page_content': doc.page_content,
143
+ 'document_metadata': doc.metadata,
144
+ },
145
+ )
146
+ nodes.append(node)
147
+
148
+ kg = KnowledgeGraph(nodes=nodes)
149
+
150
+ # apply transforms and update the knowledge graph
151
+ apply_transforms(kg, transforms)
152
+
153
+ # save the knowledge graph
154
+ output_path = os.path.dirname(local_file)
155
+ os.makedirs(output_path, exist_ok=True)
156
+ kg.save(local_file)
157
+ logger.info(f'Knowledge graph saved to {local_file}')
158
+ return kg
159
+
160
+
161
+ def load_data(file_path):
162
+ from langchain_community.document_loaders import UnstructuredFileLoader
163
+
164
+ loader = UnstructuredFileLoader(file_path, mode='elements')
165
+ data = loader.load()
166
+ return data
167
+
168
+
169
+ def generate_testset(args: TestsetGenerationArguments) -> None:
170
+
171
+ from ragas.testset import TestsetGenerator
172
+ from ragas import RunConfig
173
+
174
+ # load data
175
+ documents = load_data(args.docs)
176
+
177
+ # generator with models
178
+ generator_llm = LLM.load(**args.generator_llm)
179
+ embeddings = EmbeddingModel.load(**args.embeddings)
180
+
181
+ # Change resulting question type distribution
182
+ distributions = get_distribution(
183
+ LangchainLLMWrapper(generator_llm), args.distribution, args.language
184
+ )
185
+
186
+ # get transforms
187
+ transforms = get_transform(
188
+ LangchainLLMWrapper(generator_llm),
189
+ LangchainEmbeddingsWrapper(embeddings),
190
+ args.language,
191
+ )
192
+
193
+ # get knowledge graph
194
+ knowledge_graph = get_knowledge_graph(documents, transforms, args.knowledge_graph)
195
+
196
+ generator = TestsetGenerator.from_langchain(
197
+ generator_llm, embeddings, knowledge_graph
198
+ )
199
+
200
+ runconfig = RunConfig(
201
+ timeout=600, max_retries=3, max_wait=120, max_workers=1, log_tenacity=True
202
+ )
203
+ testset = generator.generate(
204
+ testset_size=args.test_size,
205
+ query_distribution=distributions,
206
+ run_config=runconfig,
207
+ with_debugging_logs=True,
208
+ raise_exceptions=True,
209
+ )
210
+
211
+ # save file
212
+ testset_df = testset.to_pandas()
213
+ output_path = os.path.dirname(args.output_file)
214
+ os.makedirs(output_path, exist_ok=True)
215
+ testset_df.to_json(
216
+ args.output_file, indent=4, index=False, orient='records', force_ascii=False
217
+ )
218
+
219
+ # get answer
220
+ testset_with_answer = get_answer(testset_df, generator_llm, args.language)
221
+ testset_with_answer.to_json(
222
+ args.output_file.replace('.json', '_with_answer.json'),
223
+ indent=4,
224
+ index=False,
225
+ orient='records',
226
+ force_ascii=False,
227
+ )
228
+
229
+
230
+ def get_answer(testset_df, generator_llm, language: None):
231
+ template = """You are an assistant for question-answering tasks.
232
+ Use the following pieces of retrieved context to answer the question.
233
+ If you don't know the answer, just say that you don't know. Answer in {language}.
234
+ Question: {question}
235
+ Context: {contexts}
236
+ Answer:
237
+ """
238
+
239
+ items = []
240
+ for i in tqdm(range(len(testset_df)), desc='Generating Answers'):
241
+ row = testset_df.iloc[i]
242
+ question = row['user_input']
243
+ contexts = '\n'.join(row['reference_contexts'])
244
+
245
+ # Combine question and contexts as input for the LLM
246
+ input_text = template.format(
247
+ language=language, question=question, contexts=contexts
248
+ )
249
+
250
+ # Generate the answer using the generator LLM
251
+ answer = generator_llm.invoke(input_text)
252
+ if isinstance(generator_llm, ChatOpenAI):
253
+ answer = answer.content
254
+ items.append(
255
+ {
256
+ 'user_input': question,
257
+ 'retrieved_contexts': row['reference_contexts'],
258
+ 'response': answer,
259
+ 'reference': row['reference'],
260
+ }
261
+ )
262
+
263
+ return pd.DataFrame.from_dict(items)
@@ -0,0 +1,72 @@
1
+ import os
2
+ import asyncio
3
+ from typing import List
4
+ from ragas.prompt import PromptMixin
5
+ from ragas.llms import BaseRagasLLM
6
+ from ragas.utils import RAGAS_SUPPORTED_LANGUAGE_CODES
7
+ from evalscope.utils.logger import get_logger
8
+
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ async def translate_prompt(
14
+ prompt_user: PromptMixin,
15
+ target_lang: str,
16
+ llm: BaseRagasLLM,
17
+ adapt_instruction: bool = False,
18
+ ):
19
+ if target_lang not in RAGAS_SUPPORTED_LANGUAGE_CODES:
20
+ logger.warning(
21
+ f'{target_lang} is not in supported language: {list(RAGAS_SUPPORTED_LANGUAGE_CODES)}'
22
+ )
23
+ return
24
+
25
+ if not issubclass(type(prompt_user), PromptMixin):
26
+ logger.info(f"{prompt_user} is not a PromptMixin, don't translate it")
27
+ return
28
+
29
+ class_name = prompt_user.__class__.__name__
30
+ current_dir = os.path.dirname(__file__)
31
+ prompt_dir = os.path.abspath(
32
+ os.path.join(current_dir, f'../prompts/{target_lang}/{class_name}')
33
+ )
34
+ os.makedirs(prompt_dir, exist_ok=True)
35
+
36
+ try:
37
+ loader_prompts = prompt_user.load_prompts(prompt_dir, target_lang)
38
+ prompt_user.set_prompts(**loader_prompts)
39
+ logger.info(f'Load existing prompts from {prompt_dir}')
40
+ return
41
+ except FileNotFoundError:
42
+ logger.info(f'Not find existing prompts {class_name}, generate new prompts.')
43
+
44
+ logger.info(f'Translating prompts to {target_lang}')
45
+ adapted_prompts = await prompt_user.adapt_prompts(
46
+ language=target_lang, llm=llm, adapt_instruction=adapt_instruction
47
+ )
48
+ prompt_user.set_prompts(**adapted_prompts)
49
+ try:
50
+ prompt_user.save_prompts(prompt_dir)
51
+ except FileExistsError:
52
+ logger.info(f'Find existing prompt {class_name}, skip saving.')
53
+ logger.info(f'Save new prompts to {prompt_dir}')
54
+
55
+ return
56
+
57
+
58
+ async def translate_prompts(
59
+ prompts: List[PromptMixin],
60
+ target_lang: str,
61
+ llm: BaseRagasLLM,
62
+ adapt_instruction: bool = False,
63
+ ):
64
+ if target_lang and target_lang != 'english':
65
+ await asyncio.gather(
66
+ *[
67
+ translate_prompt(prompt, target_lang, llm, adapt_instruction)
68
+ for prompt in prompts
69
+ ]
70
+ )
71
+
72
+ logger.info('Translate prompts finished')
@@ -4,7 +4,6 @@ from evalscope.backend.base import BackendManager
4
4
  from evalscope.utils.logger import get_logger
5
5
  from functools import partial
6
6
  import subprocess
7
- from dataclasses import dataclass
8
7
  import copy
9
8
 
10
9
  logger = get_logger()
@@ -8,7 +8,7 @@ class CustomDataset:
8
8
 
9
9
  def load_data(self, dataset):
10
10
  # customize the loading of the dataset
11
- data_path = os.path.join("~/LMUData", f'{dataset}.tsv')
11
+ data_path = os.path.join(os.path.expanduser("~/LMUData"), f'{dataset}.tsv')
12
12
  return load(data_path)
13
13
 
14
14
 
@@ -174,6 +174,7 @@ class Evaluator(object):
174
174
  """
175
175
  assert self.data_adapter is not None, 'data_adapter must be provided when calling func get_answers() !'
176
176
  assert self.model_adapter is not None, 'model must be provided when calling func get_answers() !'
177
+ assert len(prompts_list) > 0, 'prompts_list must not be empty when calling func get_answers() !'
177
178
 
178
179
  answers_list = []
179
180
  pred_dir: str = self.outputs_structure.get(OutputsStructure.PREDICTIONS_DIR)
@@ -76,12 +76,12 @@ class OpenaiApi:
76
76
  data = json.dumps(data, ensure_ascii=False)
77
77
 
78
78
  if self.verbose:
79
- print(f'>>data in generate_simple: {data}')
79
+ logger.info(f'>>data in generate_simple: {data}')
80
80
 
81
81
  resp = requests.post(self.url, headers=header, data=data)
82
82
  resp = resp.json()
83
83
  if self.verbose:
84
- print(f'>>resp in generate_simple: {resp}')
84
+ logger.info(f'>>resp in generate_simple: {resp}')
85
85
 
86
86
  if self.logprobs:
87
87
  return resp['choices']
@@ -209,7 +209,7 @@ async def dispatch_requests_worker(request_queue: asyncio.Queue, args):
209
209
  prompt = f.read()
210
210
  else:
211
211
  prompt = args.prompt
212
- messages = {'role': 'user', 'content': prompt}
212
+ messages = [{'role': 'user', 'content': prompt}]
213
213
  request = query_generator.build_request(messages, query_parameters)
214
214
  if args.number is None:
215
215
  await request_queue.put(request)
@@ -39,6 +39,8 @@ class OpenaiPlugin(ApiPluginBase):
39
39
  try:
40
40
  if param.query_template is not None:
41
41
  query = json.loads(param.query_template)
42
+ if 'stream' in query.keys():
43
+ param.stream = query['stream']
42
44
  query['messages'] = messages # replace template messages with input messages.
43
45
  return self.__compose_query_from_parameter(query, param)
44
46
  else:
evalscope/run.py CHANGED
@@ -207,6 +207,10 @@ def run_task(task_cfg: Union[str, dict, TaskConfig, List[TaskConfig]]) -> Union[
207
207
  from evalscope.backend.vlm_eval_kit import VLMEvalKitBackendManager
208
208
  vlm_eval_kit_backend_manager = VLMEvalKitBackendManager(config=eval_config)
209
209
  vlm_eval_kit_backend_manager.run()
210
+ elif eval_backend == EvalBackend.RAG_EVAL.value:
211
+ from evalscope.backend.rag_eval import RAGEvalBackendManager
212
+ rag_eval_backend_manager = RAGEvalBackendManager(config=eval_config)
213
+ rag_eval_backend_manager.run()
210
214
  # TODO: Add other evaluation backends
211
215
  elif eval_backend == EvalBackend.THIRD_PARTY.value:
212
216
  raise NotImplementedError(f'Not implemented for evaluation backend {eval_backend}')
evalscope/utils/logger.py CHANGED
@@ -1,18 +1,20 @@
1
1
  # Copyright (c) Alibaba, Inc. and its affiliates.
2
2
 
3
+ import importlib.util as iutil
3
4
  import logging
4
5
  from typing import Optional
5
6
 
6
7
  init_loggers = {}
8
+ format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
9
+ formatter = logging.Formatter(format)
7
10
 
8
- formatter = logging.Formatter(
9
- '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
11
+ logging.basicConfig(format=format, level=logging.INFO)
10
12
 
11
13
 
12
- def get_logger(log_file: Optional[str] = None,
13
- log_level: int = logging.INFO,
14
- file_mode: str = 'w'):
15
- """ Get logging logger
14
+ def get_logger(
15
+ log_file: Optional[str] = None, log_level: int = logging.INFO, file_mode: str = "w"
16
+ ):
17
+ """Get logging logger
16
18
 
17
19
  Args:
18
20
  log_file: Log filename, if specified, file handler will be added to
@@ -22,21 +24,39 @@ def get_logger(log_file: Optional[str] = None,
22
24
  specified (if filemode is unspecified, it defaults to 'w').
23
25
  """
24
26
 
25
- logger_name = __name__.split('.')[0]
27
+ logger_name = __name__.split(".")[0]
26
28
  logger = logging.getLogger(logger_name)
27
-
29
+ logger.propagate = False
28
30
  if logger_name in init_loggers:
29
31
  add_file_handler_if_needed(logger, log_file, file_mode, log_level)
32
+ if logger.level != log_level:
33
+ logger.setLevel(log_level)
30
34
  return logger
31
35
 
32
- for handler in logger.root.handlers:
33
- if type(handler) is logging.StreamHandler:
34
- handler.setLevel(logging.ERROR)
36
+ # handle duplicate logs to the console
37
+ # Starting in 1.8.0, PyTorch DDP attaches a StreamHandler <stderr> (NOTSET)
38
+ # to the root logger. As logger.propagate is True by default, this root
39
+ # level handler causes logging messages from rank>0 processes to
40
+ # unexpectedly show up on the console, creating much unwanted clutter.
41
+ # To fix this issue, we set the root logger's StreamHandler, if any, to log
42
+ # at the ERROR level.
43
+ torch_dist = False
44
+ is_worker0 = True
45
+ if iutil.find_spec("torch") is not None:
46
+ from modelscope.utils.torch_utils import is_dist, is_master
47
+
48
+ torch_dist = is_dist()
49
+ is_worker0 = is_master()
50
+
51
+ if torch_dist:
52
+ for handler in logger.root.handlers:
53
+ if type(handler) is logging.StreamHandler:
54
+ handler.setLevel(logging.ERROR)
35
55
 
36
56
  stream_handler = logging.StreamHandler()
37
57
  handlers = [stream_handler]
38
58
 
39
- if log_file is not None:
59
+ if is_worker0 and log_file is not None:
40
60
  file_handler = logging.FileHandler(log_file, file_mode)
41
61
  handlers.append(file_handler)
42
62
 
@@ -45,7 +65,10 @@ def get_logger(log_file: Optional[str] = None,
45
65
  handler.setLevel(log_level)
46
66
  logger.addHandler(handler)
47
67
 
48
- logger.setLevel(log_level)
68
+ if is_worker0:
69
+ logger.setLevel(log_level)
70
+ else:
71
+ logger.setLevel(logging.ERROR)
49
72
 
50
73
  init_loggers[logger_name] = True
51
74
 
@@ -57,7 +80,14 @@ def add_file_handler_if_needed(logger, log_file, file_mode, log_level):
57
80
  if isinstance(handler, logging.FileHandler):
58
81
  return
59
82
 
60
- if log_file is not None:
83
+ if iutil.find_spec("torch") is not None:
84
+ from modelscope.utils.torch_utils import is_master
85
+
86
+ is_worker0 = is_master()
87
+ else:
88
+ is_worker0 = True
89
+
90
+ if is_worker0 and log_file is not None:
61
91
  file_handler = logging.FileHandler(log_file, file_mode)
62
92
  file_handler.setFormatter(formatter)
63
93
  file_handler.setLevel(log_level)
@@ -11,6 +11,9 @@ class EvalBackend(Enum):
11
11
 
12
12
  # Use VLM Eval Kit as the multi-modal model evaluation backend
13
13
  VLM_EVAL_KIT = 'VLMEvalKit'
14
+
15
+ # Use RAGEval as the RAG evaluation backend
16
+ RAG_EVAL = 'RAGEval'
14
17
 
15
18
  # Use third-party evaluation backend/modules
16
19
  THIRD_PARTY = 'ThirdParty'
evalscope/version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # Copyright (c) Alibaba, Inc. and its affiliates.
2
2
 
3
- __version__ = '0.5.5rc1'
4
- __release_datetime__ = '2024-09-29 08:00:00'
3
+ __version__ = "0.6.0"
4
+ __release_datetime__ = "2024-11-08 11:59:59"