evalscope 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of evalscope might be problematic. Click here for more details.
- evalscope/backend/base.py +1 -1
- evalscope/backend/rag_eval/utils/clip.py +2 -2
- evalscope/backend/rag_eval/utils/embedding.py +1 -1
- evalscope/benchmarks/general_qa/general_qa_adapter.py +1 -1
- evalscope/benchmarks/gsm8k/gsm8k_adapter.py +2 -1
- evalscope/benchmarks/hellaswag/hellaswag_adapter.py +2 -1
- evalscope/benchmarks/humaneval/humaneval_adapter.py +193 -7
- evalscope/benchmarks/race/race_adapter.py +2 -1
- evalscope/config.py +38 -2
- evalscope/constants.py +24 -38
- evalscope/evaluator/__init__.py +0 -1
- evalscope/evaluator/evaluator.py +6 -4
- evalscope/evaluator/rating_eval.py +1 -1
- evalscope/evaluator/reviewer/auto_reviewer.py +2 -1
- evalscope/models/model_adapter.py +1 -1
- evalscope/perf/arguments.py +3 -1
- evalscope/perf/benchmark.py +3 -3
- evalscope/perf/main.py +5 -6
- evalscope/perf/plugin/api/openai_api.py +53 -49
- evalscope/perf/plugin/registry.py +3 -3
- evalscope/perf/utils/benchmark_util.py +4 -4
- evalscope/perf/utils/db_util.py +66 -22
- evalscope/perf/utils/local_server.py +4 -1
- evalscope/run.py +45 -82
- evalscope/run_arena.py +2 -1
- evalscope/summarizer.py +14 -26
- evalscope/third_party/longbench_write/eval.py +2 -1
- evalscope/third_party/longbench_write/longbench_write.py +2 -1
- evalscope/third_party/longbench_write/tools/data_etl.py +1 -1
- evalscope/third_party/toolbench_static/toolbench_static.py +2 -1
- evalscope/tools/combine_reports.py +2 -4
- evalscope/tools/rewrite_eval_results.py +1 -1
- evalscope/utils/__init__.py +1 -0
- evalscope/utils/chat_service.py +1 -1
- evalscope/utils/io_utils.py +162 -0
- evalscope/utils/logger.py +8 -0
- evalscope/utils/utils.py +0 -175
- evalscope/version.py +2 -2
- {evalscope-0.8.0.dist-info → evalscope-0.8.2.dist-info}/METADATA +15 -3
- {evalscope-0.8.0.dist-info → evalscope-0.8.2.dist-info}/RECORD +47 -67
- tests/cli/test_run.py +11 -12
- tests/perf/test_perf.py +3 -2
- tests/vlm/test_vlmeval.py +3 -2
- evalscope/backend/rag_eval/ragas/prompts/chinese/AnswerCorrectness/correctness_prompt_chinese.json +0 -87
- evalscope/backend/rag_eval/ragas/prompts/chinese/AnswerCorrectness/long_form_answer_prompt_chinese.json +0 -36
- evalscope/backend/rag_eval/ragas/prompts/chinese/AnswerRelevancy/question_generation_chinese.json +0 -26
- evalscope/backend/rag_eval/ragas/prompts/chinese/ContextPrecision/context_precision_prompt_chinese.json +0 -41
- evalscope/backend/rag_eval/ragas/prompts/chinese/CustomNodeFilter/scoring_prompt_chinese.json +0 -7
- evalscope/backend/rag_eval/ragas/prompts/chinese/Faithfulness/nli_statements_message_chinese.json +0 -60
- evalscope/backend/rag_eval/ragas/prompts/chinese/Faithfulness/statement_prompt_chinese.json +0 -36
- evalscope/backend/rag_eval/ragas/prompts/chinese/HeadlinesExtractor/prompt_chinese.json +0 -24
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopAbstractQuerySynthesizer/concept_combination_prompt_chinese.json +0 -35
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopAbstractQuerySynthesizer/generate_query_reference_prompt_chinese.json +0 -30
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopAbstractQuerySynthesizer/theme_persona_matching_prompt_chinese.json +0 -39
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopSpecificQuerySynthesizer/generate_query_reference_prompt_chinese.json +0 -30
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopSpecificQuerySynthesizer/theme_persona_matching_prompt_chinese.json +0 -39
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiModalFaithfulness/faithfulness_prompt_chinese.json +0 -34
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiModalRelevance/relevance_prompt_chinese.json +0 -36
- evalscope/backend/rag_eval/ragas/prompts/chinese/NERExtractor/prompt_chinese.json +0 -25
- evalscope/backend/rag_eval/ragas/prompts/chinese/SingleHopSpecificQuerySynthesizer/generate_query_reference_prompt_chinese.json +0 -24
- evalscope/backend/rag_eval/ragas/prompts/chinese/SingleHopSpecificQuerySynthesizer/theme_persona_matching_prompt_chinese.json +0 -39
- evalscope/backend/rag_eval/ragas/prompts/chinese/SummaryExtractor/prompt_chinese.json +0 -16
- evalscope/backend/rag_eval/ragas/prompts/chinese/ThemesExtractor/prompt_chinese.json +0 -24
- evalscope/evaluator/humaneval_evaluator.py +0 -158
- {evalscope-0.8.0.dist-info → evalscope-0.8.2.dist-info}/LICENSE +0 -0
- {evalscope-0.8.0.dist-info → evalscope-0.8.2.dist-info}/WHEEL +0 -0
- {evalscope-0.8.0.dist-info → evalscope-0.8.2.dist-info}/entry_points.txt +0 -0
- {evalscope-0.8.0.dist-info → evalscope-0.8.2.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@ import time
|
|
|
4
4
|
|
|
5
5
|
from evalscope.models.custom import CustomModel
|
|
6
6
|
from evalscope.run import run_task
|
|
7
|
-
from evalscope.utils import yaml_to_dict
|
|
7
|
+
from evalscope.utils.io_utils import yaml_to_dict
|
|
8
8
|
from evalscope.utils.logger import get_logger
|
|
9
9
|
|
|
10
10
|
logger = get_logger()
|
evalscope/utils/__init__.py
CHANGED
evalscope/utils/chat_service.py
CHANGED
|
@@ -43,7 +43,7 @@ class DeltaMessage(BaseModel):
|
|
|
43
43
|
|
|
44
44
|
class ChatCompletionRequest(BaseModel):
|
|
45
45
|
model: str
|
|
46
|
-
messages: List[ChatMessage]
|
|
46
|
+
messages: Union[List[ChatMessage], str]
|
|
47
47
|
temperature: Optional[float] = None
|
|
48
48
|
top_p: Optional[float] = None
|
|
49
49
|
max_tokens: Optional[int] = 2048
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import jsonlines as jsonl
|
|
3
|
+
import os
|
|
4
|
+
import yaml
|
|
5
|
+
|
|
6
|
+
from evalscope.constants import DumpMode
|
|
7
|
+
from evalscope.utils.logger import get_logger
|
|
8
|
+
|
|
9
|
+
logger = get_logger()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OutputsStructure:
|
|
13
|
+
LOGS_DIR = 'logs'
|
|
14
|
+
PREDICTIONS_DIR = 'predictions'
|
|
15
|
+
REVIEWS_DIR = 'reviews'
|
|
16
|
+
REPORTS_DIR = 'reports'
|
|
17
|
+
CONFIGS_DIR = 'configs'
|
|
18
|
+
|
|
19
|
+
def __init__(self, outputs_dir: str, is_make=True):
|
|
20
|
+
self.outputs_dir = outputs_dir
|
|
21
|
+
self.is_make = is_make
|
|
22
|
+
self._dirs = {
|
|
23
|
+
'logs_dir': None,
|
|
24
|
+
'predictions_dir': None,
|
|
25
|
+
'reviews_dir': None,
|
|
26
|
+
'reports_dir': None,
|
|
27
|
+
'configs_dir': None
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def _get_dir(self, attr_name, dir_name):
|
|
31
|
+
if self._dirs[attr_name] is None:
|
|
32
|
+
dir_path = os.path.join(self.outputs_dir, dir_name)
|
|
33
|
+
if self.is_make:
|
|
34
|
+
os.makedirs(dir_path, exist_ok=True)
|
|
35
|
+
self._dirs[attr_name] = dir_path
|
|
36
|
+
return self._dirs[attr_name]
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def logs_dir(self):
|
|
40
|
+
return self._get_dir('logs_dir', OutputsStructure.LOGS_DIR)
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def predictions_dir(self):
|
|
44
|
+
return self._get_dir('predictions_dir', OutputsStructure.PREDICTIONS_DIR)
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def reviews_dir(self):
|
|
48
|
+
return self._get_dir('reviews_dir', OutputsStructure.REVIEWS_DIR)
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def reports_dir(self):
|
|
52
|
+
return self._get_dir('reports_dir', OutputsStructure.REPORTS_DIR)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def configs_dir(self):
|
|
56
|
+
return self._get_dir('configs_dir', OutputsStructure.CONFIGS_DIR)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def jsonl_to_list(jsonl_file):
|
|
60
|
+
"""
|
|
61
|
+
Read jsonl file to list.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
jsonl_file: jsonl file path.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
list: list of lines. Each line is a dict.
|
|
68
|
+
"""
|
|
69
|
+
res_list = []
|
|
70
|
+
with jsonl.open(jsonl_file, mode='r') as reader:
|
|
71
|
+
for line in reader.iter(type=dict, allow_none=True, skip_invalid=False):
|
|
72
|
+
res_list.append(line)
|
|
73
|
+
return res_list
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def jsonl_to_reader(jsonl_file):
|
|
77
|
+
"""
|
|
78
|
+
Read jsonl file to reader object.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
jsonl_file: jsonl file path.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
reader: jsonl reader object.
|
|
85
|
+
"""
|
|
86
|
+
with jsonl.open(jsonl_file, mode='r') as reader:
|
|
87
|
+
return reader
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def dump_jsonl_data(data_list, jsonl_file, dump_mode=DumpMode.OVERWRITE):
|
|
91
|
+
"""
|
|
92
|
+
Dump data to jsonl file.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
data_list: data list to be dumped. [{'a': 'aaa'}, ...]
|
|
96
|
+
jsonl_file: jsonl file path.
|
|
97
|
+
dump_mode: dump mode. It can be 'overwrite' or 'append'.
|
|
98
|
+
"""
|
|
99
|
+
if not jsonl_file:
|
|
100
|
+
raise ValueError('output file must be provided.')
|
|
101
|
+
|
|
102
|
+
jsonl_file = os.path.expanduser(jsonl_file)
|
|
103
|
+
|
|
104
|
+
if not isinstance(data_list, list):
|
|
105
|
+
data_list = [data_list]
|
|
106
|
+
|
|
107
|
+
if dump_mode == DumpMode.OVERWRITE:
|
|
108
|
+
dump_mode = 'w'
|
|
109
|
+
elif dump_mode == DumpMode.APPEND:
|
|
110
|
+
dump_mode = 'a'
|
|
111
|
+
with jsonl.open(jsonl_file, mode=dump_mode) as writer:
|
|
112
|
+
writer.write_all(data_list)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def jsonl_to_csv():
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def yaml_to_dict(yaml_file) -> dict:
|
|
120
|
+
"""
|
|
121
|
+
Read yaml file to dict.
|
|
122
|
+
"""
|
|
123
|
+
with open(yaml_file, 'r') as f:
|
|
124
|
+
try:
|
|
125
|
+
stream = yaml.safe_load(f)
|
|
126
|
+
except yaml.YAMLError as e:
|
|
127
|
+
logger.error(f'{e}')
|
|
128
|
+
raise e
|
|
129
|
+
|
|
130
|
+
return stream
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def dict_to_yaml(d: dict, yaml_file: str):
|
|
134
|
+
"""
|
|
135
|
+
Dump dict to yaml file.
|
|
136
|
+
"""
|
|
137
|
+
with open(yaml_file, 'w') as f:
|
|
138
|
+
yaml.dump(d, f, default_flow_style=False)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def json_to_dict(json_file) -> dict:
|
|
142
|
+
"""
|
|
143
|
+
Read json file to dict.
|
|
144
|
+
"""
|
|
145
|
+
with open(json_file, 'r') as f:
|
|
146
|
+
try:
|
|
147
|
+
stream = json.load(f)
|
|
148
|
+
except json.JSONDecodeError as e:
|
|
149
|
+
logger.error(f'{e}')
|
|
150
|
+
raise e
|
|
151
|
+
|
|
152
|
+
return stream
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def are_paths_same(path1, path2):
|
|
156
|
+
"""
|
|
157
|
+
Check if two paths are the same.
|
|
158
|
+
"""
|
|
159
|
+
real_path1 = os.path.realpath(os.path.abspath(os.path.expanduser(path1)))
|
|
160
|
+
real_path2 = os.path.realpath(os.path.abspath(os.path.expanduser(path2)))
|
|
161
|
+
|
|
162
|
+
return real_path1 == real_path2
|
evalscope/utils/logger.py
CHANGED
|
@@ -75,6 +75,14 @@ def get_logger(log_file: Optional[str] = None, log_level: int = DEFAULT_LEVEL, f
|
|
|
75
75
|
return logger
|
|
76
76
|
|
|
77
77
|
|
|
78
|
+
def configure_logging(debug: bool, log_file: Optional[str] = None):
|
|
79
|
+
"""Configure logging level based on the debug flag."""
|
|
80
|
+
if log_file:
|
|
81
|
+
get_logger(log_file=log_file, force=True)
|
|
82
|
+
if debug:
|
|
83
|
+
get_logger(log_level=logging.DEBUG, force=True)
|
|
84
|
+
|
|
85
|
+
|
|
78
86
|
def add_file_handler_if_needed(logger, log_file, file_mode, log_level):
|
|
79
87
|
for handler in logger.handlers:
|
|
80
88
|
if isinstance(handler, logging.FileHandler):
|
evalscope/utils/utils.py
CHANGED
|
@@ -5,19 +5,13 @@ import functools
|
|
|
5
5
|
import hashlib
|
|
6
6
|
import importlib
|
|
7
7
|
import importlib.util
|
|
8
|
-
import json
|
|
9
|
-
import jsonlines as jsonl
|
|
10
8
|
import numpy as np
|
|
11
9
|
import os
|
|
12
10
|
import random
|
|
13
11
|
import re
|
|
14
|
-
import sys
|
|
15
12
|
import torch
|
|
16
|
-
import torch.nn.functional as F
|
|
17
|
-
import yaml
|
|
18
13
|
from typing import Any, Dict, List, Tuple, Union
|
|
19
14
|
|
|
20
|
-
from evalscope.constants import DumpMode
|
|
21
15
|
from evalscope.utils.logger import get_logger
|
|
22
16
|
|
|
23
17
|
logger = get_logger()
|
|
@@ -36,102 +30,6 @@ def test_level_list():
|
|
|
36
30
|
return TEST_LEVEL_LIST
|
|
37
31
|
|
|
38
32
|
|
|
39
|
-
def jsonl_to_list(jsonl_file):
|
|
40
|
-
"""
|
|
41
|
-
Read jsonl file to list.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
jsonl_file: jsonl file path.
|
|
45
|
-
|
|
46
|
-
Returns:
|
|
47
|
-
list: list of lines. Each line is a dict.
|
|
48
|
-
"""
|
|
49
|
-
res_list = []
|
|
50
|
-
with jsonl.open(jsonl_file, mode='r') as reader:
|
|
51
|
-
for line in reader.iter(type=dict, allow_none=True, skip_invalid=False):
|
|
52
|
-
res_list.append(line)
|
|
53
|
-
return res_list
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def jsonl_to_reader(jsonl_file):
|
|
57
|
-
"""
|
|
58
|
-
Read jsonl file to reader object.
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
jsonl_file: jsonl file path.
|
|
62
|
-
|
|
63
|
-
Returns:
|
|
64
|
-
reader: jsonl reader object.
|
|
65
|
-
"""
|
|
66
|
-
with jsonl.open(jsonl_file, mode='r') as reader:
|
|
67
|
-
return reader
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def jsonl_to_csv():
|
|
71
|
-
pass
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def dump_jsonl_data(data_list, jsonl_file, dump_mode=DumpMode.OVERWRITE):
|
|
75
|
-
"""
|
|
76
|
-
Dump data to jsonl file.
|
|
77
|
-
|
|
78
|
-
Args:
|
|
79
|
-
data_list: data list to be dumped. [{'a': 'aaa'}, ...]
|
|
80
|
-
jsonl_file: jsonl file path.
|
|
81
|
-
dump_mode: dump mode. It can be 'overwrite' or 'append'.
|
|
82
|
-
"""
|
|
83
|
-
if not jsonl_file:
|
|
84
|
-
raise ValueError('output file must be provided.')
|
|
85
|
-
|
|
86
|
-
jsonl_file = os.path.expanduser(jsonl_file)
|
|
87
|
-
|
|
88
|
-
if not isinstance(data_list, list):
|
|
89
|
-
data_list = [data_list]
|
|
90
|
-
|
|
91
|
-
if dump_mode == DumpMode.OVERWRITE:
|
|
92
|
-
dump_mode = 'w'
|
|
93
|
-
elif dump_mode == DumpMode.APPEND:
|
|
94
|
-
dump_mode = 'a'
|
|
95
|
-
with jsonl.open(jsonl_file, mode=dump_mode) as writer:
|
|
96
|
-
writer.write_all(data_list)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def yaml_to_dict(yaml_file) -> dict:
|
|
100
|
-
"""
|
|
101
|
-
Read yaml file to dict.
|
|
102
|
-
"""
|
|
103
|
-
with open(yaml_file, 'r') as f:
|
|
104
|
-
try:
|
|
105
|
-
stream = yaml.safe_load(f)
|
|
106
|
-
except yaml.YAMLError as e:
|
|
107
|
-
logger.error(f'{e}')
|
|
108
|
-
raise e
|
|
109
|
-
|
|
110
|
-
return stream
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def dict_to_yaml(d: dict, yaml_file: str):
|
|
114
|
-
"""
|
|
115
|
-
Dump dict to yaml file.
|
|
116
|
-
"""
|
|
117
|
-
with open(yaml_file, 'w') as f:
|
|
118
|
-
yaml.dump(d, f, default_flow_style=False)
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def json_to_dict(json_file) -> dict:
|
|
122
|
-
"""
|
|
123
|
-
Read json file to dict.
|
|
124
|
-
"""
|
|
125
|
-
with open(json_file, 'r') as f:
|
|
126
|
-
try:
|
|
127
|
-
stream = json.load(f)
|
|
128
|
-
except json.JSONDecodeError as e:
|
|
129
|
-
logger.error(f'{e}')
|
|
130
|
-
raise e
|
|
131
|
-
|
|
132
|
-
return stream
|
|
133
|
-
|
|
134
|
-
|
|
135
33
|
def get_obj_from_cfg(eval_class_ref: Any, *args, **kwargs) -> Any:
|
|
136
34
|
module_name, spliter, cls_name = eval_class_ref.partition(':')
|
|
137
35
|
|
|
@@ -300,18 +198,6 @@ class ResponseParser:
|
|
|
300
198
|
return ''
|
|
301
199
|
|
|
302
200
|
|
|
303
|
-
def make_outputs_dir(root_dir: str, datasets: list, model_id: str, model_revision: str):
|
|
304
|
-
if not model_id:
|
|
305
|
-
model_id = 'default'
|
|
306
|
-
model_id = model_id.replace('/', '_')
|
|
307
|
-
|
|
308
|
-
if not model_revision:
|
|
309
|
-
model_revision = 'default'
|
|
310
|
-
|
|
311
|
-
outputs_dir = os.path.join(root_dir, model_id, model_revision, f"eval_{'-'.join(datasets)}")
|
|
312
|
-
|
|
313
|
-
return outputs_dir
|
|
314
|
-
|
|
315
201
|
|
|
316
202
|
def import_module_util(import_path_prefix: str, module_name: str, members_to_import: list) -> dict:
|
|
317
203
|
"""
|
|
@@ -355,67 +241,6 @@ def normalize_score(score: Union[float, dict], keep_num: int = 4) -> Union[float
|
|
|
355
241
|
return score
|
|
356
242
|
|
|
357
243
|
|
|
358
|
-
def split_str_parts_by(text: str, delimiters: List[str]):
|
|
359
|
-
"""Split the text field into parts.
|
|
360
|
-
Args:
|
|
361
|
-
text: A text to be split.
|
|
362
|
-
delimiters: The delimiters.
|
|
363
|
-
Returns:
|
|
364
|
-
The split text in list of dicts.
|
|
365
|
-
"""
|
|
366
|
-
all_start_chars = [d[0] for d in delimiters]
|
|
367
|
-
all_length = [len(d) for d in delimiters]
|
|
368
|
-
|
|
369
|
-
text_list = []
|
|
370
|
-
last_words = ''
|
|
371
|
-
|
|
372
|
-
while len(text) > 0:
|
|
373
|
-
for char_idx, char in enumerate(text):
|
|
374
|
-
match_index = [idx for idx, start_char in enumerate(all_start_chars) if start_char == char]
|
|
375
|
-
is_delimiter = False
|
|
376
|
-
for index in match_index:
|
|
377
|
-
if text[char_idx:char_idx + all_length[index]] == delimiters[index]:
|
|
378
|
-
if last_words:
|
|
379
|
-
if text_list:
|
|
380
|
-
text_list[-1]['content'] = last_words
|
|
381
|
-
else:
|
|
382
|
-
text_list.append({'key': '', 'content': last_words})
|
|
383
|
-
last_words = ''
|
|
384
|
-
text_list.append({'key': delimiters[index]})
|
|
385
|
-
text = text[char_idx + all_length[index]:]
|
|
386
|
-
is_delimiter = True
|
|
387
|
-
break
|
|
388
|
-
if not is_delimiter:
|
|
389
|
-
last_words += char
|
|
390
|
-
else:
|
|
391
|
-
break
|
|
392
|
-
if last_words == text:
|
|
393
|
-
text = ''
|
|
394
|
-
|
|
395
|
-
text_list[-1]['content'] = last_words
|
|
396
|
-
return text_list
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
def get_bucket_sizes(max_length: int) -> List[int]:
|
|
400
|
-
return [max_length // 4 * (i + 1) for i in range(4)]
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
def _get_closet_bucket(bucket_sizes, data_length):
|
|
404
|
-
"""Select the one from bucket_sizes that is closest in distance to
|
|
405
|
-
data_length. This is required for TorchAcc.
|
|
406
|
-
"""
|
|
407
|
-
cloest_length = sys.maxsize
|
|
408
|
-
for b in bucket_sizes:
|
|
409
|
-
if b == data_length or ((b < cloest_length) and (b > data_length)):
|
|
410
|
-
cloest_length = b
|
|
411
|
-
|
|
412
|
-
if cloest_length == sys.maxsize:
|
|
413
|
-
bucket_sizes.append(data_length)
|
|
414
|
-
cloest_length = data_length
|
|
415
|
-
|
|
416
|
-
return cloest_length
|
|
417
|
-
|
|
418
|
-
|
|
419
244
|
def is_module_installed(module_name):
|
|
420
245
|
try:
|
|
421
246
|
importlib.import_module(module_name)
|
evalscope/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: evalscope
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.2
|
|
4
4
|
Summary: EvalScope: Lightweight LLMs Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/modelscope/evalscope
|
|
6
6
|
Author: ModelScope team
|
|
@@ -84,7 +84,7 @@ Requires-Dist: transformers-stream-generator; extra == "all"
|
|
|
84
84
|
Requires-Dist: ms-opencompass>=0.1.4; extra == "all"
|
|
85
85
|
Requires-Dist: ms-vlmeval>=0.0.9; extra == "all"
|
|
86
86
|
Requires-Dist: mteb==1.19.4; extra == "all"
|
|
87
|
-
Requires-Dist: ragas==0.2.
|
|
87
|
+
Requires-Dist: ragas==0.2.9; extra == "all"
|
|
88
88
|
Requires-Dist: webdataset>0.2.0; extra == "all"
|
|
89
89
|
Requires-Dist: aiohttp; extra == "all"
|
|
90
90
|
Requires-Dist: fastapi; extra == "all"
|
|
@@ -129,7 +129,7 @@ Requires-Dist: transformers; extra == "perf"
|
|
|
129
129
|
Requires-Dist: unicorn; extra == "perf"
|
|
130
130
|
Provides-Extra: rag
|
|
131
131
|
Requires-Dist: mteb==1.19.4; extra == "rag"
|
|
132
|
-
Requires-Dist: ragas==0.2.
|
|
132
|
+
Requires-Dist: ragas==0.2.9; extra == "rag"
|
|
133
133
|
Requires-Dist: webdataset>0.2.0; extra == "rag"
|
|
134
134
|
Provides-Extra: vlmeval
|
|
135
135
|
Requires-Dist: ms-vlmeval>=0.0.9; extra == "vlmeval"
|
|
@@ -181,6 +181,8 @@ The framework accommodates multiple evaluation scenarios such as end-to-end RAG
|
|
|
181
181
|
<br>EvalScope Framework.
|
|
182
182
|
</p>
|
|
183
183
|
|
|
184
|
+
<details><summary>Framework Description</summary>
|
|
185
|
+
|
|
184
186
|
The architecture includes the following modules:
|
|
185
187
|
1. **Model Adapter**: The model adapter is used to convert the outputs of specific models into the format required by the framework, supporting both API call models and locally run models.
|
|
186
188
|
2. **Data Adapter**: The data adapter is responsible for converting and processing input data to meet various evaluation needs and formats.
|
|
@@ -194,6 +196,16 @@ The architecture includes the following modules:
|
|
|
194
196
|
5. **Evaluation Report**: The final generated evaluation report summarizes the model's performance, which can be used for decision-making and further model optimization.
|
|
195
197
|
6. **Visualization**: Visualization results help users intuitively understand evaluation results, facilitating analysis and comparison of different model performances.
|
|
196
198
|
|
|
199
|
+
</details>
|
|
200
|
+
|
|
201
|
+
## ☎ User Groups
|
|
202
|
+
|
|
203
|
+
Please scan the QR code below to join our community groups:
|
|
204
|
+
|
|
205
|
+
[Discord Group](https://discord.com/invite/D27yfEFVz5) | WeChat Group | DingTalk Group
|
|
206
|
+
:-------------------------:|:-------------------------:|:-------------------------:
|
|
207
|
+
<img src="docs/asset/discord_qr.jpg" width="160" height="160"> | <img src="docs/asset/wechat.png" width="160" height="160"> | <img src="docs/asset/dingding.png" width="160" height="160">
|
|
208
|
+
|
|
197
209
|
|
|
198
210
|
## 🎉 News
|
|
199
211
|
- 🔥 **[2024.12.13]** Model evaluation optimization: no need to pass the `--template-type` parameter anymore; supports starting evaluation with `evalscope eval --args`. Refer to the [📖 User Guide](https://evalscope.readthedocs.io/en/latest/get_started/basic_usage.html) for more details.
|