evalscope 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalscope/backend/opencompass/tasks/eval_datasets.py +1 -0
- evalscope/backend/rag_eval/cmteb/tasks/Clustering.py +96 -96
- evalscope/backend/rag_eval/cmteb/tasks/Reranking.py +70 -71
- evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +120 -100
- evalscope/backend/rag_eval/utils/__init__.py +0 -0
- evalscope/backend/rag_eval/utils/clip.py +149 -0
- evalscope/backend/rag_eval/utils/embedding.py +183 -0
- evalscope/backend/rag_eval/utils/llm.py +72 -0
- evalscope/backend/rag_eval/utils/tools.py +63 -0
- evalscope/metrics/bundled_rouge_score/rouge_scorer.py +1 -1
- evalscope/version.py +2 -2
- {evalscope-0.6.0.dist-info → evalscope-0.6.1.dist-info}/METADATA +14 -13
- {evalscope-0.6.0.dist-info → evalscope-0.6.1.dist-info}/RECORD +16 -11
- {evalscope-0.6.0.dist-info → evalscope-0.6.1.dist-info}/WHEEL +1 -1
- {evalscope-0.6.0.dist-info → evalscope-0.6.1.dist-info}/entry_points.txt +0 -0
- {evalscope-0.6.0.dist-info → evalscope-0.6.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Any, Dict, Iterator, List, Mapping, Optional
|
|
3
|
+
from modelscope.utils.hf_util import GenerationConfig
|
|
4
|
+
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
|
|
5
|
+
from langchain_core.language_models.llms import LLM as BaseLLM
|
|
6
|
+
from evalscope.models.model_adapter import ChatGenerationModelAdapter
|
|
7
|
+
from langchain_openai import ChatOpenAI
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LLM:
|
|
11
|
+
@staticmethod
|
|
12
|
+
def load(**kw):
|
|
13
|
+
api_base = kw.get('api_base', None)
|
|
14
|
+
if api_base:
|
|
15
|
+
return ChatOpenAI(
|
|
16
|
+
model_name=kw.get('model_name', ''),
|
|
17
|
+
openai_api_base=api_base,
|
|
18
|
+
openai_api_key=kw.get('api_key', 'EMPTY'),
|
|
19
|
+
)
|
|
20
|
+
else:
|
|
21
|
+
return LocalLLM(**kw)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LocalLLM(BaseLLM):
|
|
25
|
+
"""A custom LLM that loads a model from a given path and performs inference."""
|
|
26
|
+
|
|
27
|
+
model_name_or_path: str
|
|
28
|
+
model_revision: str = 'master'
|
|
29
|
+
template_type: str = 'default'
|
|
30
|
+
model_name: Optional[str]
|
|
31
|
+
model: Optional[ChatGenerationModelAdapter]
|
|
32
|
+
generation_config: Optional[Dict]
|
|
33
|
+
|
|
34
|
+
def __init__(self, **kw):
|
|
35
|
+
super().__init__(**kw)
|
|
36
|
+
self.model_name = os.path.basename(self.model_name_or_path)
|
|
37
|
+
self.model = ChatGenerationModelAdapter(
|
|
38
|
+
model_id=self.model_name_or_path,
|
|
39
|
+
model_revision=self.model_revision,
|
|
40
|
+
template_type=self.template_type,
|
|
41
|
+
generation_config=GenerationConfig(**self.generation_config) if self.generation_config else None,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def _call(
|
|
45
|
+
self,
|
|
46
|
+
prompt: str,
|
|
47
|
+
stop: Optional[List[str]] = None,
|
|
48
|
+
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
49
|
+
**kwargs: Any,
|
|
50
|
+
) -> str:
|
|
51
|
+
"""Run the LLM on the given input."""
|
|
52
|
+
infer_cfg = {'stop': stop}
|
|
53
|
+
|
|
54
|
+
response = self.model._model_generate(prompt, infer_cfg)
|
|
55
|
+
return response
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def _identifying_params(self) -> Dict[str, Any]:
|
|
59
|
+
"""Return a dictionary of identifying parameters."""
|
|
60
|
+
return {
|
|
61
|
+
# The model name allows users to specify custom token counting
|
|
62
|
+
# rules in LLM monitoring applications (e.g., in LangSmith users
|
|
63
|
+
# can provide per token pricing for their model and monitor
|
|
64
|
+
# costs for the given LLM.)
|
|
65
|
+
'model_name': self.model_name,
|
|
66
|
+
'revision': self.model_revision,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def _llm_type(self) -> str:
|
|
71
|
+
"""Get the type of language model used by this chat model. Used for logging purposes only."""
|
|
72
|
+
return self.model_name
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import os
|
|
3
|
+
import base64
|
|
4
|
+
from modelscope import snapshot_download
|
|
5
|
+
from evalscope.utils.logger import get_logger
|
|
6
|
+
|
|
7
|
+
logger = get_logger()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def PIL_to_bytes(image_format, **kwargs):
|
|
11
|
+
OPTIONS = {
|
|
12
|
+
"webp": dict(format="webp", lossless=True),
|
|
13
|
+
"png": dict(format="png"),
|
|
14
|
+
"jpg": dict(format="jpeg"),
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
def transform(image):
|
|
18
|
+
bytestream = io.BytesIO()
|
|
19
|
+
image.save(bytestream, **OPTIONS[image_format])
|
|
20
|
+
return bytestream.getvalue()
|
|
21
|
+
|
|
22
|
+
return transform
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def PIL_to_base64(image, **kwargs):
|
|
26
|
+
bytestream = io.BytesIO()
|
|
27
|
+
image.save(bytestream, format="jpeg")
|
|
28
|
+
return base64.b64encode(bytestream.getvalue()).decode("utf-8")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def path_to_bytes(filepath):
|
|
32
|
+
with open(filepath, "rb") as fp:
|
|
33
|
+
return fp.read()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def path_to_base64(filepath):
|
|
37
|
+
file_content = path_to_bytes(filepath)
|
|
38
|
+
return base64.b64encode(file_content).decode("utf-8")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def ensure_dir(file_path):
|
|
42
|
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def save_to_jsonl(df, file_path):
|
|
46
|
+
ensure_dir(file_path)
|
|
47
|
+
df.to_json(file_path, orient="records", lines=True, force_ascii=False)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def save_to_tsv(df, file_path):
|
|
51
|
+
ensure_dir(file_path)
|
|
52
|
+
df.to_csv(file_path, sep="\t", index=False)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def download_model(model_id: str, revision: str):
|
|
56
|
+
"""
|
|
57
|
+
default base dir: '~/.cache/modelscope/hub/model_id'
|
|
58
|
+
"""
|
|
59
|
+
logger.info(f"Loading model {model_id} from modelscope")
|
|
60
|
+
|
|
61
|
+
model_path = snapshot_download(model_id=model_id, revision=revision)
|
|
62
|
+
|
|
63
|
+
return model_path
|
|
@@ -51,7 +51,7 @@ try:
|
|
|
51
51
|
punkt_tab_url = 'https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/open_data/nltk_data/punkt_tab.zip'
|
|
52
52
|
|
|
53
53
|
if not os.path.exists(punkt_path):
|
|
54
|
-
os.system(f'wget -P {nltk_dir} {punkt_tab_url}')
|
|
54
|
+
os.system(f'wget --timeout=10 --tries=3 -P {nltk_dir} {punkt_tab_url}')
|
|
55
55
|
os.system(f'unzip {punkt_path} -d {nltk_dir}')
|
|
56
56
|
else:
|
|
57
57
|
logger.info(f'{punkt_path} already exists, skipping download')
|
evalscope/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: evalscope
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: EvalScope: Lightweight LLMs Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/modelscope/evalscope
|
|
6
6
|
Author: ModelScope team
|
|
@@ -28,7 +28,7 @@ Requires-Dist: nltk>=3.9
|
|
|
28
28
|
Requires-Dist: openai
|
|
29
29
|
Requires-Dist: pandas
|
|
30
30
|
Requires-Dist: plotly
|
|
31
|
-
Requires-Dist: pyarrow
|
|
31
|
+
Requires-Dist: pyarrow<=17.0.0
|
|
32
32
|
Requires-Dist: pympler
|
|
33
33
|
Requires-Dist: pyyaml
|
|
34
34
|
Requires-Dist: regex
|
|
@@ -61,7 +61,7 @@ Requires-Dist: nltk>=3.9; extra == "all"
|
|
|
61
61
|
Requires-Dist: openai; extra == "all"
|
|
62
62
|
Requires-Dist: pandas; extra == "all"
|
|
63
63
|
Requires-Dist: plotly; extra == "all"
|
|
64
|
-
Requires-Dist: pyarrow; extra == "all"
|
|
64
|
+
Requires-Dist: pyarrow<=17.0.0; extra == "all"
|
|
65
65
|
Requires-Dist: pympler; extra == "all"
|
|
66
66
|
Requires-Dist: pyyaml; extra == "all"
|
|
67
67
|
Requires-Dist: regex; extra == "all"
|
|
@@ -80,10 +80,10 @@ Requires-Dist: transformers>=4.33; extra == "all"
|
|
|
80
80
|
Requires-Dist: transformers-stream-generator; extra == "all"
|
|
81
81
|
Requires-Dist: jieba; extra == "all"
|
|
82
82
|
Requires-Dist: rouge-chinese; extra == "all"
|
|
83
|
-
Requires-Dist: ms-opencompass>=0.1.
|
|
83
|
+
Requires-Dist: ms-opencompass>=0.1.3; extra == "all"
|
|
84
84
|
Requires-Dist: ms-vlmeval>=0.0.5; extra == "all"
|
|
85
|
-
Requires-Dist: mteb
|
|
86
|
-
Requires-Dist: ragas
|
|
85
|
+
Requires-Dist: mteb==1.19.4; extra == "all"
|
|
86
|
+
Requires-Dist: ragas==0.2.5; extra == "all"
|
|
87
87
|
Requires-Dist: webdataset>0.2.0; extra == "all"
|
|
88
88
|
Provides-Extra: inner
|
|
89
89
|
Requires-Dist: absl-py; extra == "inner"
|
|
@@ -112,10 +112,10 @@ Requires-Dist: tqdm; extra == "inner"
|
|
|
112
112
|
Requires-Dist: transformers<4.43,>=4.33; extra == "inner"
|
|
113
113
|
Requires-Dist: transformers-stream-generator; extra == "inner"
|
|
114
114
|
Provides-Extra: opencompass
|
|
115
|
-
Requires-Dist: ms-opencompass>=0.1.
|
|
115
|
+
Requires-Dist: ms-opencompass>=0.1.3; extra == "opencompass"
|
|
116
116
|
Provides-Extra: rag
|
|
117
|
-
Requires-Dist: mteb
|
|
118
|
-
Requires-Dist: ragas
|
|
117
|
+
Requires-Dist: mteb==1.19.4; extra == "rag"
|
|
118
|
+
Requires-Dist: ragas==0.2.5; extra == "rag"
|
|
119
119
|
Requires-Dist: webdataset>0.2.0; extra == "rag"
|
|
120
120
|
Provides-Extra: vlmeval
|
|
121
121
|
Requires-Dist: ms-vlmeval>=0.0.5; extra == "vlmeval"
|
|
@@ -139,6 +139,7 @@ Requires-Dist: ms-vlmeval>=0.0.5; extra == "vlmeval"
|
|
|
139
139
|
<a href="https://evalscope.readthedocs.io/en/latest/">📖 Documents</a>
|
|
140
140
|
<p>
|
|
141
141
|
|
|
142
|
+
> ⭐ If you like this project, please click the "Star" button at the top right to support us. Your support is our motivation to keep going!
|
|
142
143
|
|
|
143
144
|
## 📋 Table of Contents
|
|
144
145
|
- [Introduction](#introduction)
|
|
@@ -164,7 +165,7 @@ EvalScope is the official model evaluation and performance benchmarking framewor
|
|
|
164
165
|
The architecture includes the following modules:
|
|
165
166
|
1. **Model Adapter**: The model adapter is used to convert the outputs of specific models into the format required by the framework, supporting both API call models and locally run models.
|
|
166
167
|
2. **Data Adapter**: The data adapter is responsible for converting and processing input data to meet various evaluation needs and formats.
|
|
167
|
-
3. **Evaluation Backend**:
|
|
168
|
+
3. **Evaluation Backend**:
|
|
168
169
|
- **Native**: EvalScope’s own **default evaluation framework**, supporting various evaluation modes, including single model evaluation, arena mode, baseline model comparison mode, etc.
|
|
169
170
|
- **OpenCompass**: Supports [OpenCompass](https://github.com/open-compass/opencompass) as the evaluation backend, providing advanced encapsulation and task simplification, allowing you to submit tasks for evaluation more easily.
|
|
170
171
|
- **VLMEvalKit**: Supports [VLMEvalKit](https://github.com/open-compass/VLMEvalKit) as the evaluation backend, enabling easy initiation of multi-modal evaluation tasks, supporting various multi-modal models and datasets.
|
|
@@ -251,7 +252,7 @@ You can execute this command from any directory:
|
|
|
251
252
|
python -m evalscope.run \
|
|
252
253
|
--model qwen/Qwen2-0.5B-Instruct \
|
|
253
254
|
--template-type qwen \
|
|
254
|
-
--datasets arc
|
|
255
|
+
--datasets arc
|
|
255
256
|
```
|
|
256
257
|
|
|
257
258
|
#### Install from source
|
|
@@ -358,13 +359,13 @@ EvalScope supports using third-party evaluation frameworks to initiate evaluatio
|
|
|
358
359
|
EvalScope supports custom dataset evaluation. For detailed information, please refer to the Custom Dataset Evaluation [📖User Guide](https://evalscope.readthedocs.io/en/latest/advanced_guides/custom_dataset.html)
|
|
359
360
|
|
|
360
361
|
## Offline Evaluation
|
|
361
|
-
You can use local dataset to evaluate the model without internet connection.
|
|
362
|
+
You can use local dataset to evaluate the model without internet connection.
|
|
362
363
|
|
|
363
364
|
Refer to: Offline Evaluation [📖 User Guide](https://evalscope.readthedocs.io/en/latest/user_guides/offline_evaluation.html)
|
|
364
365
|
|
|
365
366
|
|
|
366
367
|
## Arena Mode
|
|
367
|
-
The Arena mode allows multiple candidate models to be evaluated through pairwise battles, and can choose to use the AI Enhanced Auto-Reviewer (AAR) automatic evaluation process or manual evaluation to obtain the evaluation report.
|
|
368
|
+
The Arena mode allows multiple candidate models to be evaluated through pairwise battles, and can choose to use the AI Enhanced Auto-Reviewer (AAR) automatic evaluation process or manual evaluation to obtain the evaluation report.
|
|
368
369
|
|
|
369
370
|
Refer to: Arena Mode [📖 User Guide](https://evalscope.readthedocs.io/en/latest/user_guides/arena.html)
|
|
370
371
|
|
|
@@ -6,7 +6,7 @@ evalscope/run.py,sha256=uAXtaxIBcR94jyfHGFAecuzn0y71oLgu-d9VOohCJAw,18738
|
|
|
6
6
|
evalscope/run_arena.py,sha256=BCWCAiX0BQ9pLMIq08svEcd-IoFr75gFShpV88robIY,8963
|
|
7
7
|
evalscope/run_ms.py,sha256=UtJoGnah64SXigTawJQWTi_TEGjr7Td0rjCTaO-htL8,6028
|
|
8
8
|
evalscope/summarizer.py,sha256=rIyML8HpjQxIpXg8KvQ0CzOS6xMS-JHZh6kUZzkaRsk,6640
|
|
9
|
-
evalscope/version.py,sha256=
|
|
9
|
+
evalscope/version.py,sha256=o4SLhBjhMLzVbUK1flGxf-kiqIBLnLnJbxG06BmvkyU,118
|
|
10
10
|
evalscope/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
evalscope/backend/base.py,sha256=5BLrDNNwxsGp35zorD-kphmN15tlBbkuuqwkz8jWZq0,876
|
|
12
12
|
evalscope/backend/opencompass/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
|
|
@@ -14,7 +14,7 @@ evalscope/backend/opencompass/api_meta_template.py,sha256=sBW0XbVDOKeJ7mVUDLhmcG
|
|
|
14
14
|
evalscope/backend/opencompass/backend_manager.py,sha256=_eg82FLAVxQ6t5e1OqlyuxZcngqD8rxvI5EijLUh_zI,10294
|
|
15
15
|
evalscope/backend/opencompass/tasks/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
|
|
16
16
|
evalscope/backend/opencompass/tasks/eval_api.py,sha256=12lrgDpMzZ1XBRboq5TEOovDPCMDwwGCJoRT78Ox_yo,1108
|
|
17
|
-
evalscope/backend/opencompass/tasks/eval_datasets.py,sha256=
|
|
17
|
+
evalscope/backend/opencompass/tasks/eval_datasets.py,sha256=3V67A2LSj_XaiGd9fqdKpxpzyNrfynCH3UnhaBtAaqc,5326
|
|
18
18
|
evalscope/backend/rag_eval/__init__.py,sha256=8om6TVnTMmyTEQt1jBuUQA4UfIzyps-_-ih90H_Qjio,284
|
|
19
19
|
evalscope/backend/rag_eval/backend_manager.py,sha256=jmO-UMu6_iOXMnl4--PrMWCsnIYEhsbiX017rtURqm0,2997
|
|
20
20
|
evalscope/backend/rag_eval/clip_benchmark/__init__.py,sha256=gDXCiRUTSeGQHxd5SjQsnphMqHJ2si2jywRiHvujEOg,150
|
|
@@ -30,10 +30,10 @@ evalscope/backend/rag_eval/cmteb/arguments.py,sha256=wZvnVir2tSxYCV_DPR3TSDj4Vxt
|
|
|
30
30
|
evalscope/backend/rag_eval/cmteb/base.py,sha256=fYrIjKwOLwBAHb2rlNkEjYScjZ5Qpyv2LdMmWZYWREA,2830
|
|
31
31
|
evalscope/backend/rag_eval/cmteb/task_template.py,sha256=Clyc8TZCtZrL6MjAw49rh55Xb3hf2y1C3SzLvZsorLE,2646
|
|
32
32
|
evalscope/backend/rag_eval/cmteb/tasks/Classification.py,sha256=7adR40W6Uu58-QR9jCUP4k7TdAnG0oT225v4xHXah2g,10635
|
|
33
|
-
evalscope/backend/rag_eval/cmteb/tasks/Clustering.py,sha256
|
|
33
|
+
evalscope/backend/rag_eval/cmteb/tasks/Clustering.py,sha256=-oJ9rXy7pgOB7Gyf68TcSlmmAUoBx5hKofcKNuIsCd8,8977
|
|
34
34
|
evalscope/backend/rag_eval/cmteb/tasks/CustomTask.py,sha256=rF6dtrwOfvJoq2Y4myZg9_638M1g06qq0hWCmvxsIo0,2039
|
|
35
35
|
evalscope/backend/rag_eval/cmteb/tasks/PairClassification.py,sha256=2WkaTE-jF8jqsu1UcNDqN8A4567UzW5boD_0B83j-9A,4008
|
|
36
|
-
evalscope/backend/rag_eval/cmteb/tasks/Reranking.py,sha256=
|
|
36
|
+
evalscope/backend/rag_eval/cmteb/tasks/Reranking.py,sha256=C34nDuya8OT3aeMxYCYjUpUtWp7w00jSfIYQSInlNAg,5329
|
|
37
37
|
evalscope/backend/rag_eval/cmteb/tasks/Retrieval.py,sha256=wUxiQH5aOmWNS4YswACyHqBn5xqP5eyvsq6U9WSp5R0,11457
|
|
38
38
|
evalscope/backend/rag_eval/cmteb/tasks/STS.py,sha256=6GMaoCANM-IKYLk4srHOYr_eurav3DGihHMQeJPXR6k,12054
|
|
39
39
|
evalscope/backend/rag_eval/cmteb/tasks/__init__.py,sha256=eBHm_TWeh7WiwpdVBtUlegeXMAxJyVQdUHRhJERobIs,1506
|
|
@@ -44,8 +44,13 @@ evalscope/backend/rag_eval/ragas/metrics/__init__.py,sha256=HgY5nrcNtWpQ7gBi5lCE
|
|
|
44
44
|
evalscope/backend/rag_eval/ragas/metrics/multi_modal_faithfulness.py,sha256=Uqz5qWZ76Gos95_QlhwncbATXyk0YX4wkI0LiAdPElU,3838
|
|
45
45
|
evalscope/backend/rag_eval/ragas/metrics/multi_modal_relevance.py,sha256=CdLnWHq1eTna6j3F5-pncW5YusxD_v3ScjzeCsZ7mng,3967
|
|
46
46
|
evalscope/backend/rag_eval/ragas/tasks/__init__.py,sha256=WO2xja0g0JSiYGdu2uAEDQgDceuFcgPWwPoqFnwDU0s,172
|
|
47
|
-
evalscope/backend/rag_eval/ragas/tasks/testset_generation.py,sha256=
|
|
47
|
+
evalscope/backend/rag_eval/ragas/tasks/testset_generation.py,sha256=nX-dG0Fm1629pSASujuEmMODFZf1955WncNNykRrNtI,9305
|
|
48
48
|
evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py,sha256=bXOqik6qKWzbrEz21ykdkqeqqPrmoUIhTwW6eRQXy0M,2222
|
|
49
|
+
evalscope/backend/rag_eval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
|
+
evalscope/backend/rag_eval/utils/clip.py,sha256=frafvJ1soUtjFUmi-053_Fhg6ERRwyvczQBlLWAX9vE,5104
|
|
51
|
+
evalscope/backend/rag_eval/utils/embedding.py,sha256=RZf0JlovZY_cCBsq8MMUqC_Sy78WtKLY_rBAlRA_udo,6239
|
|
52
|
+
evalscope/backend/rag_eval/utils/llm.py,sha256=9tFwMNoTf3jNomgDu5qqVLO92HtEtelH3DXpny9_B2g,2552
|
|
53
|
+
evalscope/backend/rag_eval/utils/tools.py,sha256=LpcYoeIBj1btzQ1_P84u1dYCdRWhMtiltxihmZCvWKk,1528
|
|
49
54
|
evalscope/backend/vlm_eval_kit/__init__.py,sha256=xTgHM95lWzh4s0W7zxLwYkgUbPAZfAb0UoGGmyyBXrs,83
|
|
50
55
|
evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=ewhpE9yzsqf5ED6kqsqek2YEgg96GBQOupxtVNhaXxI,6046
|
|
51
56
|
evalscope/backend/vlm_eval_kit/custom_dataset.py,sha256=Yz2A5kB1E8DYBnjuVCA6TTPtLjhg8vYKeJTh6FU_Ecw,1645
|
|
@@ -132,7 +137,7 @@ evalscope/metrics/math_accuracy.py,sha256=1PCy1VUNYg48JcGy-6SUmUDZNwPeAkMW1QQ_lX
|
|
|
132
137
|
evalscope/metrics/metrics.py,sha256=sDZljGiZwgHsFZ5eNi65-3z3BLCdIwWUzPcq2QpKf1k,12545
|
|
133
138
|
evalscope/metrics/rouge_metric.py,sha256=sN0r-sXXc-nJUdFrthQPAv1VFdOCrF6zzIYDKaLSgrU,4522
|
|
134
139
|
evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
|
|
135
|
-
evalscope/metrics/bundled_rouge_score/rouge_scorer.py,sha256=
|
|
140
|
+
evalscope/metrics/bundled_rouge_score/rouge_scorer.py,sha256=MXcHwmsXnh9mQZR1Bt5St6DNwXY-mfz4dNM8y6a23dc,12236
|
|
136
141
|
evalscope/models/__init__.py,sha256=zG27J2HSeKPGiAIUE7QLPHEPLyXLsfaDwYI_TDXjpCg,145
|
|
137
142
|
evalscope/models/dummy_chat_model.py,sha256=xE8wcFVSCkvizEJ-B8ojX0Ir01Q5KrN5mapjMQaQtbg,1325
|
|
138
143
|
evalscope/models/model.py,sha256=ZzzVzZHVzuzdt5F1r-rEBT44ZfW9B7R1spsrV-T8nSw,3020
|
|
@@ -204,8 +209,8 @@ evalscope/utils/logger.py,sha256=cf3U400Mx1speMMNXorjwEE8noDz5Mbd-9PNgaulGeY,301
|
|
|
204
209
|
evalscope/utils/task_cfg_parser.py,sha256=LiNQ2X8lbZU0cODpaY_PbKyUhNoxZIC495UsLJigX64,138
|
|
205
210
|
evalscope/utils/task_utils.py,sha256=IMtBSBUp3H95Ko0vn8Q55Wmz2SFZXSfjVy49tyomL_g,537
|
|
206
211
|
evalscope/utils/utils.py,sha256=zHo9hfxGBUVKE2xNMR7lDoEvfRnk4V4946DEfXQhlq4,20509
|
|
207
|
-
evalscope-0.6.
|
|
208
|
-
evalscope-0.6.
|
|
209
|
-
evalscope-0.6.
|
|
210
|
-
evalscope-0.6.
|
|
211
|
-
evalscope-0.6.
|
|
212
|
+
evalscope-0.6.1.dist-info/METADATA,sha256=n4CpTzJGnhgqEsfbL1UfZtXHULmeNCGnKChyi6eT8Fw,21237
|
|
213
|
+
evalscope-0.6.1.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
|
|
214
|
+
evalscope-0.6.1.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
|
|
215
|
+
evalscope-0.6.1.dist-info/top_level.txt,sha256=jNR-HMn3TR8Atolq7_4rW8IWVX6GhvYV5_1Y_KbJKlY,10
|
|
216
|
+
evalscope-0.6.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|