PyPI - evalscope - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

evalscope 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of evalscope might be problematic. Click here for more details.

Files changed (10) hide show

evalscope/backend/opencompass/backend_manager.py CHANGED Viewed

@@ -76,9 +76,7 @@ class OpenCompassBackendManager(BackendManager):
     @staticmethod
     def _check_env():
         if is_module_installed('opencompass'):
-            logger.info('Please make sure you have installed the `ms-opencompass`: `pip install ms-opencompass`')
-        else:
-            raise ModuleNotFoundError('Please install the `ms-opencompass` first: `pip install ms-opencompass`')
+            logger.info('Check the OpenCompass environment: OK')
     @staticmethod
     def get_restore_arg(arg_name: str, arg_val: bool):

evalscope/backend/opencompass/tasks/eval_api.py CHANGED Viewed

@@ -6,6 +6,7 @@ from opencompass.tasks import OpenICLInferTask
 with read_base():
+    from opencompass.configs.summarizers.medium import summarizer
     from evalscope.backend.opencompass.tasks.eval_datasets import datasets
 # 1. Get datasets

evalscope/backend/vlm_eval_kit/backend_manager.py CHANGED Viewed

@@ -31,7 +31,7 @@ class VLMEvalKitBackendManager(BackendManager):
         from vlmeval.utils.arguments import Arguments as VLMEvalArguments
         self.args = VLMEvalArguments(**self.config_d)
-        self.valid_models = self.list_supported_VLMs()
+        self.valid_models = self.list_supported_models()
         self.valid_model_names = list(self.valid_models.keys())
         self.valid_datasets = self.list_supported_datasets()
@@ -86,7 +86,7 @@ class VLMEvalKitBackendManager(BackendManager):
         return self.get_cmd()
     @staticmethod
-    def list_supported_VLMs():
+    def list_supported_models():
         from vlmeval.config import supported_VLM
         return supported_VLM
@@ -98,9 +98,7 @@ class VLMEvalKitBackendManager(BackendManager):
     @staticmethod
     def _check_env():
         if is_module_installed('vlmeval'):
-            logger.info('Please make sure you have installed the `ms-vlmeval`: `pip install ms-vlmeval`')
-        else:
-            raise ModuleNotFoundError('Please install the `ms-vlmeval` first: `pip install ms-vlmeval`')
+            logger.info('Check VLM Evaluation Kit: Installed')
     @staticmethod
     def get_restore_arg(arg_name: str, arg_val: bool):

evalscope/utils/task_utils.py CHANGED Viewed

@@ -3,7 +3,7 @@ from enum import Enum
 class EvalBackend(Enum):
-    # Use native evaluation pipeline of Eval-Scope
+    # Use native evaluation pipeline of EvalScope
     NATIVE = 'Native'
     # Use OpenCompass framework as the evaluation backend

evalscope/version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-__version__ = '0.5.0'
-__release_datetime__ = '2024-08-01 08:00:00'
+__version__ = '0.5.2'
+__release_datetime__ = '2024-08-06 08:00:00'

{evalscope-0.5.0.dist-info → evalscope-0.5.2.dist-info}/METADATA RENAMED Viewed

@@ -1,13 +1,11 @@
 Metadata-Version: 2.1
 Name: evalscope
-Version: 0.5.0
-Summary: Eval-Scope: Lightweight LLMs Evaluation Framework
-Home-page: https://github.com/modelscope/eval-scope
+Version: 0.5.2
+Summary: EvalScope: Lightweight LLMs Evaluation Framework
+Home-page: https://github.com/modelscope/evalscope
 Author: ModelScope team
 Author-email: contact@modelscope.cn
-License: UNKNOWN
 Keywords: python,llm,evaluation
-Platform: UNKNOWN
 Classifier: Development Status :: 4 - Beta
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent
@@ -80,8 +78,8 @@ Requires-Dist: transformers (<4.43,>=4.33) ; extra == 'all'
 Requires-Dist: transformers-stream-generator ; extra == 'all'
 Requires-Dist: jieba ; extra == 'all'
 Requires-Dist: rouge-chinese ; extra == 'all'
-Requires-Dist: ms-opencompass ; extra == 'all'
-Requires-Dist: ms-vlmeval ; extra == 'all'
+Requires-Dist: ms-opencompass (>=0.0.5) ; extra == 'all'
+Requires-Dist: ms-vlmeval (>=0.0.5) ; extra == 'all'
 Provides-Extra: inner
 Requires-Dist: absl-py ; extra == 'inner'
 Requires-Dist: accelerate ; extra == 'inner'
@@ -109,16 +107,16 @@ Requires-Dist: tqdm ; extra == 'inner'
 Requires-Dist: transformers (<4.43,>=4.33) ; extra == 'inner'
 Requires-Dist: transformers-stream-generator ; extra == 'inner'
 Provides-Extra: opencompass
-Requires-Dist: ms-opencompass ; extra == 'opencompass'
+Requires-Dist: ms-opencompass (>=0.0.5) ; extra == 'opencompass'
 Provides-Extra: vlmeval
-Requires-Dist: ms-vlmeval ; extra == 'vlmeval'
+Requires-Dist: ms-vlmeval (>=0.0.5) ; extra == 'vlmeval'
 English | [简体中文](README_zh.md)
 <p align="center">
 <a href="https://pypi.org/project/evalscope"><img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dm/evalscope">
 </a>
-<a href="https://github.com/modelscope/eval-scope/pulls"><img src="https://img.shields.io/badge/PR-welcome-55EB99.svg"></a>
+<a href="https://github.com/modelscope/evalscope/pulls"><img src="https://img.shields.io/badge/PR-welcome-55EB99.svg"></a>
 <p>
 ## 📖 Table of Content
@@ -133,7 +131,7 @@ English | [简体中文](README_zh.md)
 ## 📝 Introduction
-Large Language Model (LLMs) evaluation has become a critical process for assessing and improving LLMs. To better support the evaluation of large models, we propose the Eval-Scope framework, which includes the following components and features:
+Large Language Model (LLMs) evaluation has become a critical process for assessing and improving LLMs. To better support the evaluation of large models, we propose the EvalScope framework, which includes the following components and features:
 - Pre-configured common benchmark datasets, including: MMLU, CMMLU, C-Eval, GSM8K, ARC, HellaSwag, TruthfulQA, MATH, HumanEval, etc.
 - Implementation of common evaluation metrics
@@ -146,7 +144,7 @@ Large Language Model (LLMs) evaluation has become a critical process for assessi
 - Visualization tools
 - Model Inference Performance Evaluation [Tutorial](evalscope/perf/README.md)
 - Support for OpenCompass as an Evaluation Backend, featuring advanced encapsulation and task simplification to easily submit tasks to OpenCompass for evaluation.
-- Supports VLMEvalKit as the evaluation backend. It initiates VLMEvalKit's multimodal evaluation tasks through Eval-Scope, supporting various multimodal models and datasets.
+- Supports VLMEvalKit as the evaluation backend. It initiates VLMEvalKit's multimodal evaluation tasks through EvalScope, supporting various multimodal models and datasets.
 - Full pipeline support: Seamlessly integrate with SWIFT to easily train and deploy model services, initiate evaluation tasks, view evaluation reports, and achieve an end-to-end large model development process.
@@ -167,33 +165,48 @@ Large Language Model (LLMs) evaluation has become a critical process for assessi
 - **[2024.07.31]** Breaking change: The sdk name has been changed from `llmuses` to `evalscope`, please update the sdk name in your code.
 - **[2024.07.26]** Supports **VLMEvalKit** as a third-party evaluation framework, initiating multimodal model evaluation tasks. [User Guide](#vlmevalkit-evaluation-backend) 🔥🔥🔥
 - **[2024.06.29]** Supports **OpenCompass** as a third-party evaluation framework. We have provided a high-level wrapper, supporting installation via pip and simplifying the evaluation task configuration. [User Guide](#opencompass-evaluation-backend) 🔥🔥🔥
-- **[2024.06.13]** Eval-Scope has been updated to version 0.3.x, which supports the ModelScope SWIFT framework for LLMs evaluation. 🚀🚀🚀
+- **[2024.06.13]** EvalScope has been updated to version 0.3.x, which supports the ModelScope SWIFT framework for LLMs evaluation. 🚀🚀🚀
 - **[2024.06.13]** We have supported the ToolBench as a third-party evaluation backend for Agents evaluation. 🚀🚀🚀
 ## 🛠️ Installation
 ### Install with pip
-1. create conda environment
+1. create conda environment [Optional]
 ```shell
-conda create -n eval-scope python=3.10
-conda activate eval-scope
+conda create -n evalscope python=3.10
+conda activate evalscope
 ```
-2. Install Eval-Scope
+2. Install EvalScope
 ```shell
-pip install evalscope
+pip install evalscope                # Installation with Native backend (by default)
+pip install evalscope[opencompass]   # Installation with OpenCompass backend
+pip install evalscope[vlmeval]       # Installation with VLMEvalKit backend
+pip install evalscope[all]           # Installation with all backends (Native, OpenCompass, VLMEvalKit)
 ```
+DEPRECATION WARNING: For 0.4.3 or older versions, please use the following command to install:
+```shell
+pip install llmuses<=0.4.3
+# Usage:
+from llmuses.run import run_task
+...
+```
 ### Install from source code
 1. Download source code
 ```shell
-git clone https://github.com/modelscope/eval-scope.git
+git clone https://github.com/modelscope/evalscope.git
 ```
 2. Install dependencies
 ```shell
-cd eval-scope/
+cd evalscope/
 pip install -e .
 ```
@@ -237,15 +250,15 @@ print(TemplateType.get_template_name_list())
 ```
 ### Evaluation Backend
-Eval-Scope supports using third-party evaluation frameworks to initiate evaluation tasks, which we call Evaluation Backend. Currently supported Evaluation Backend includes:
-- **Native**: Eval-Scope's own **default evaluation framework**, supporting various evaluation modes including single model evaluation, arena mode, and baseline model comparison mode.
-- [OpenCompass](https://github.com/open-compass/opencompass): Initiate OpenCompass evaluation tasks through Eval-Scope. Lightweight, easy to customize, supports seamless integration with the LLM fine-tuning framework [ModelScope Swift](https://github.com/modelscope/swift).
-- [VLMEvalKit](https://github.com/open-compass/VLMEvalKit): Initiate VLMEvalKit multimodal evaluation tasks through Eval-Scope. Supports various multimodal models and datasets, and offers seamless integration with the LLM fine-tuning framework [ModelScope Swift](https://github.com/modelscope/swift).
-- **ThirdParty**: The third-party task, e.g. [ToolBench](evalscope/thirdparty/toolbench/README.md), you can contribute your own evaluation task to Eval-Scope as third-party backend.
+EvalScope supports using third-party evaluation frameworks to initiate evaluation tasks, which we call Evaluation Backend. Currently supported Evaluation Backend includes:
+- **Native**: EvalScope's own **default evaluation framework**, supporting various evaluation modes including single model evaluation, arena mode, and baseline model comparison mode.
+- [OpenCompass](https://github.com/open-compass/opencompass): Initiate OpenCompass evaluation tasks through EvalScope. Lightweight, easy to customize, supports seamless integration with the LLM fine-tuning framework [ModelScope Swift](https://github.com/modelscope/swift).
+- [VLMEvalKit](https://github.com/open-compass/VLMEvalKit): Initiate VLMEvalKit multimodal evaluation tasks through EvalScope. Supports various multimodal models and datasets, and offers seamless integration with the LLM fine-tuning framework [ModelScope Swift](https://github.com/modelscope/swift).
+- **ThirdParty**: The third-party task, e.g. [ToolBench](evalscope/thirdparty/toolbench/README.md), you can contribute your own evaluation task to EvalScope as third-party backend.
 #### OpenCompass Eval-Backend
-To facilitate the use of the OpenCompass evaluation backend, we have customized the OpenCompass source code and named it `ms-opencompass`. This version includes optimizations for evaluation task configuration and execution based on the original version, and it supports installation via PyPI. This allows users to initiate lightweight OpenCompass evaluation tasks through Eval-Scope. Additionally, we have initially opened up API-based evaluation tasks in the OpenAI API format. You can deploy model services using [ModelScope Swift](https://github.com/modelscope/swift), where [swift deploy](https://swift.readthedocs.io/en/latest/LLM/VLLM-inference-acceleration-and-deployment.html) supports using vLLM to launch model inference services.
+To facilitate the use of the OpenCompass evaluation backend, we have customized the OpenCompass source code and named it `ms-opencompass`. This version includes optimizations for evaluation task configuration and execution based on the original version, and it supports installation via PyPI. This allows users to initiate lightweight OpenCompass evaluation tasks through EvalScope. Additionally, we have initially opened up API-based evaluation tasks in the OpenAI API format. You can deploy model services using [ModelScope Swift](https://github.com/modelscope/swift), where [swift deploy](https://swift.readthedocs.io/en/latest/LLM/VLLM-inference-acceleration-and-deployment.html) supports using vLLM to launch model inference services.
 ##### Installation
@@ -301,7 +314,7 @@ python examples/example_eval_swift_openai_api.py
 #### VLMEvalKit Evaluation Backend
-To facilitate the use of the VLMEvalKit evaluation backend, we have customized the VLMEvalKit source code and named it `ms-vlmeval`. This version encapsulates the configuration and execution of evaluation tasks based on the original version and supports installation via PyPI, allowing users to initiate lightweight VLMEvalKit evaluation tasks through Eval-Scope. Additionally, we support API-based evaluation tasks in the OpenAI API format. You can deploy multimodal model services using ModelScope [swift](https://github.com/modelscope/swift).
+To facilitate the use of the VLMEvalKit evaluation backend, we have customized the VLMEvalKit source code and named it `ms-vlmeval`. This version encapsulates the configuration and execution of evaluation tasks based on the original version and supports installation via PyPI, allowing users to initiate lightweight VLMEvalKit evaluation tasks through EvalScope. Additionally, we support API-based evaluation tasks in the OpenAI API format. You can deploy multimodal model services using ModelScope [swift](https://github.com/modelscope/swift).
 ##### Installation
 ```shell
@@ -319,7 +332,8 @@ For detailed information about the datasets, please refer to [VLMEvalKit Support
 You can use the following to view the list of dataset names:
 ```python
 from evalscope.backend.vlm_eval_kit import VLMEvalKitBackendManager
-print(f'** All models from VLMEvalKit backend: {VLMEvalKitBackendManager.list(list_supported_VLMs().keys())}')
+print(f'** All models from VLMEvalKit backend: {VLMEvalKitBackendManager.list_supported_models().keys()}')
 ```
 If the dataset file does not exist locally when loading the dataset, it will be automatically downloaded to the `~/LMUData/` directory.
@@ -562,5 +576,3 @@ The LLM Leaderboard aims to provide an objective and comprehensive evaluation st
 - [ ] Auto-reviewer
   - [ ] Qwen-max

{evalscope-0.5.0.dist-info → evalscope-0.5.2.dist-info}/RECORD RENAMED Viewed

@@ -6,17 +6,17 @@ evalscope/run.py,sha256=T-2zoJpBx6YxLnLJH-iFF3UxUGYTU36PMV_DQ9e8tSM,18484
 evalscope/run_arena.py,sha256=_LL8fqeKUEMUg985TENYzcnH5_Q8sqPxM68eZk-jhLA,8793
 evalscope/run_ms.py,sha256=UtJoGnah64SXigTawJQWTi_TEGjr7Td0rjCTaO-htL8,6028
 evalscope/summarizer.py,sha256=Ie1kwPETpz3x2yROLMGqC0UwEj6OKJuKwEcUqxUx5fM,6358
-evalscope/version.py,sha256=_1Lu_R_3DYpaloS52_vLqUEBtD1ixppLykqr1dl_TqM,118
+evalscope/version.py,sha256=Bo14bi3CEm4GSQOqlmyUKrRQLg4TS8hCNrE-bnYDI28,118
 evalscope/backend/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
 evalscope/backend/base.py,sha256=5BLrDNNwxsGp35zorD-kphmN15tlBbkuuqwkz8jWZq0,876
 evalscope/backend/opencompass/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
 evalscope/backend/opencompass/api_meta_template.py,sha256=sBW0XbVDOKeJ7mVUDLhmcG4e0yClw3eluazdp_8wtgQ,1753
-evalscope/backend/opencompass/backend_manager.py,sha256=swmJELcEDNorZzyXZxOhz2q5tWAE-IkotqJVZ2rBRQ4,10366
+evalscope/backend/opencompass/backend_manager.py,sha256=Rr8eFFDUXTxI8AMcrbFW9LZuSQVZ7tsgHcZ1veNhfWM,10190
 evalscope/backend/opencompass/tasks/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
-evalscope/backend/opencompass/tasks/eval_api.py,sha256=9ylEm1Xk_xft56EEpVvlzK89_R1kQh7PI6uVZiexqy8,1042
+evalscope/backend/opencompass/tasks/eval_api.py,sha256=12lrgDpMzZ1XBRboq5TEOovDPCMDwwGCJoRT78Ox_yo,1108
 evalscope/backend/opencompass/tasks/eval_datasets.py,sha256=DWwKcQGGSkkh65H1d-oKN8Jow0Q0cHJJzDC75inycFM,5186
 evalscope/backend/vlm_eval_kit/__init__.py,sha256=xTgHM95lWzh4s0W7zxLwYkgUbPAZfAb0UoGGmyyBXrs,83
-evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=_5yZ7dUULvzLw9-LYg5Svmeia8M6-8CInmiwtGfkYF4,6213
+evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=PQ9n2jdfPj7s5Ma6_5nNuOMM4La9JBdxKbLf4Oa17NI,6055
 evalscope/benchmarks/__init__.py,sha256=6TKP35wfKf7R_h870fsEtcIlIAgomKOcukNL9M-5I1Y,162
 evalscope/benchmarks/benchmark.py,sha256=e7rA8Y_vo6q5BhlUbZGWfZ1-SfJnU2IFRg62pnjQtDk,2157
 evalscope/benchmarks/data_adapter.py,sha256=eVQvOQYQOQbIl8UlvOEUqRThL3FP3aUD6DSlqF1bqO0,10395
@@ -156,10 +156,10 @@ evalscope/utils/arena_utils.py,sha256=RMkymUv9Cxs37arUntzgDY5P0Dand2jGpsb7uy6wZm
 evalscope/utils/completion_parsers.py,sha256=61l8CTh1VxHgRoMDhtznpAhuJp47MssGgS-LdEe_h80,2997
 evalscope/utils/logger.py,sha256=Ycd0W17Z_oiByPuPX3_umNrOCHjT9O_e_Kws7ZWUSvU,1855
 evalscope/utils/task_cfg_parser.py,sha256=LiNQ2X8lbZU0cODpaY_PbKyUhNoxZIC495UsLJigX64,138
-evalscope/utils/task_utils.py,sha256=9izZ6H7nso1OJmdoduDpaFN2KA3DmZ91dkKXA8GTIUc,460
+evalscope/utils/task_utils.py,sha256=Mv_u_f4Z91zcUeko6acZCmnOAPRfk61kf_dliLzG5Yk,459
 evalscope/utils/utils.py,sha256=zHo9hfxGBUVKE2xNMR7lDoEvfRnk4V4946DEfXQhlq4,20509
-evalscope-0.5.0.dist-info/METADATA,sha256=xMJR-aNm2_br22AlhRWVeu24slq9oW5tXfxOvilPZoM,27367
-evalscope-0.5.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
-evalscope-0.5.0.dist-info/entry_points.txt,sha256=eAQqqT7PlGix33BDKmS7wsaIJ_6-vvGrq79Szb6uVxg,57
-evalscope-0.5.0.dist-info/top_level.txt,sha256=jNR-HMn3TR8Atolq7_4rW8IWVX6GhvYV5_1Y_KbJKlY,10
-evalscope-0.5.0.dist-info/RECORD,,
+evalscope-0.5.2.dist-info/METADATA,sha256=F0YWg7gyenErvz-Kq1X5Z2Ngr1TYh3H-KpCX5zBLnog,27866
+evalscope-0.5.2.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
+evalscope-0.5.2.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
+evalscope-0.5.2.dist-info/top_level.txt,sha256=jNR-HMn3TR8Atolq7_4rW8IWVX6GhvYV5_1Y_KbJKlY,10
+evalscope-0.5.2.dist-info/RECORD,,

{evalscope-0.5.0.dist-info → evalscope-0.5.2.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,3 +1,2 @@
 [console_scripts]
 evalscope = evalscope.cli.cli:run_cmd

{evalscope-0.5.0.dist-info → evalscope-0.5.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{evalscope-0.5.0.dist-info → evalscope-0.5.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

evalscope 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

Potentially problematic release.

evalscope 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl