PyPI - evalscope - Versions diffs - 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl - Mend

evalscope 0.9.0py3-none-any.whl → 0.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of evalscope might be problematic. Click here for more details.

Files changed (69) hide show

evalscope/arguments.py +1 -0
evalscope/benchmarks/arc/arc_adapter.py +3 -5
evalscope/benchmarks/bbh/bbh_adapter.py +3 -3
evalscope/benchmarks/benchmark.py +1 -1
evalscope/benchmarks/ceval/ceval_adapter.py +5 -82
evalscope/benchmarks/cmmlu/cmmlu_adapter.py +5 -79
evalscope/benchmarks/competition_math/competition_math_adapter.py +4 -4
evalscope/benchmarks/data_adapter.py +69 -70
evalscope/benchmarks/general_qa/general_qa_adapter.py +10 -63
evalscope/benchmarks/gpqa/__init__.py +0 -0
evalscope/benchmarks/gpqa/chain_of_thought.txt +81 -0
evalscope/benchmarks/gpqa/gpqa_adapter.py +103 -0
evalscope/benchmarks/gsm8k/gsm8k_adapter.py +4 -5
evalscope/benchmarks/hellaswag/hellaswag_adapter.py +12 -6
evalscope/benchmarks/humaneval/humaneval_adapter.py +3 -4
evalscope/benchmarks/ifeval/__init__.py +0 -0
evalscope/benchmarks/ifeval/ifeval_adapter.py +56 -0
evalscope/benchmarks/ifeval/instructions.py +1477 -0
evalscope/benchmarks/ifeval/instructions_registry.py +188 -0
evalscope/benchmarks/ifeval/instructions_util.py +1670 -0
evalscope/benchmarks/ifeval/utils.py +134 -0
evalscope/benchmarks/iquiz/__init__.py +0 -0
evalscope/benchmarks/iquiz/iquiz_adapter.py +63 -0
evalscope/benchmarks/mmlu/mmlu_adapter.py +8 -84
evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +2 -2
evalscope/benchmarks/race/race_adapter.py +4 -73
evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py +3 -6
evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +8 -57
evalscope/cli/cli.py +2 -0
evalscope/cli/start_app.py +30 -0
evalscope/collections/evaluator.py +82 -62
evalscope/collections/sampler.py +47 -41
evalscope/collections/schema.py +14 -10
evalscope/constants.py +4 -0
evalscope/evaluator/evaluator.py +22 -13
evalscope/metrics/__init__.py +2 -5
evalscope/metrics/metrics.py +11 -2
evalscope/metrics/named_metrics.py +17 -0
evalscope/models/chat_adapter.py +2 -0
evalscope/models/server_adapter.py +11 -4
evalscope/perf/__init__.py +1 -0
evalscope/perf/main.py +0 -1
evalscope/perf/plugin/api/custom_api.py +1 -1
evalscope/perf/plugin/api/openai_api.py +1 -1
evalscope/perf/plugin/datasets/flickr8k.py +1 -1
evalscope/perf/plugin/datasets/longalpaca.py +1 -1
evalscope/report/__init__.py +5 -0
evalscope/report/app.py +693 -0
evalscope/report/combinator.py +73 -0
evalscope/report/generator.py +80 -0
evalscope/report/utils.py +133 -0
evalscope/run.py +16 -11
evalscope/summarizer.py +1 -1
evalscope/utils/chat_service.py +1 -1
evalscope/utils/logger.py +1 -0
evalscope/utils/model_utils.py +5 -2
evalscope/version.py +2 -2
{evalscope-0.9.0.dist-info → evalscope-0.10.1.dist-info}/METADATA +84 -7
{evalscope-0.9.0.dist-info → evalscope-0.10.1.dist-info}/RECORD +66 -51
tests/cli/test_collection.py +11 -7
tests/cli/test_run.py +13 -4
evalscope/tools/__init__.py +0 -1
evalscope/tools/combine_reports.py +0 -133
evalscope/tools/gen_mmlu_subject_mapping.py +0 -90
/evalscope/{tools/rewrite_eval_results.py → models/custom/dummy_model.py} +0 -0
{evalscope-0.9.0.dist-info → evalscope-0.10.1.dist-info}/LICENSE +0 -0
{evalscope-0.9.0.dist-info → evalscope-0.10.1.dist-info}/WHEEL +0 -0
{evalscope-0.9.0.dist-info → evalscope-0.10.1.dist-info}/entry_points.txt +0 -0
{evalscope-0.9.0.dist-info → evalscope-0.10.1.dist-info}/top_level.txt +0 -0

evalscope/perf/main.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import asyncio
-import logging
 import os
 import platform
 from argparse import Namespace

evalscope/perf/plugin/api/custom_api.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import json
-from transformers import AutoTokenizer
 from typing import Any, Dict, Iterator, List
 from evalscope.perf.arguments import Arguments
@@ -25,6 +24,7 @@ class CustomPlugin(ApiPluginBase):
         """
         super().__init__(model_path=mode_path)
         if mode_path is not None:
+            from transformers import AutoTokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(mode_path)
         else:
             self.tokenizer = None

evalscope/perf/plugin/api/openai_api.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import json
 import os
-from transformers import AutoTokenizer
 from typing import Any, Dict, Iterator, List, Union
 from evalscope.perf.arguments import Arguments
@@ -25,6 +24,7 @@ class OpenaiPlugin(ApiPluginBase):
         """
         super().__init__(model_path=mode_path)
         if mode_path is not None:
+            from transformers import AutoTokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(mode_path)
         else:
             self.tokenizer = None

evalscope/perf/plugin/datasets/flickr8k.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import base64
 from io import BytesIO
-from modelscope.msdatasets import MsDataset
 from PIL import Image
 from typing import Any, Dict, Iterator, List
@@ -26,6 +25,7 @@ class FlickrDatasetPlugin(DatasetPluginBase):
         super().__init__(query_parameters)
     def build_messages(self) -> Iterator[List[Dict]]:
+        from modelscope.msdatasets import MsDataset
         dataset = MsDataset.load('clip-benchmark/wds_flickr8k', split='test')
         for item in dataset:

evalscope/perf/plugin/datasets/longalpaca.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from modelscope import MsDataset
 from typing import Any, Dict, Iterator, List
 from evalscope.perf.arguments import Arguments
@@ -17,6 +16,7 @@ class LongAlpacaDatasetPlugin(DatasetPluginBase):
     def build_messages(self) -> Iterator[List[Dict]]:
         if not self.query_parameters.dataset_path:
+            from modelscope import MsDataset
             ds = MsDataset.load('AI-ModelScope/LongAlpaca-12k', subset_name='default', split='train')
         else:
             ds = self.dataset_json_list(self.query_parameters.dataset_path)

evalscope/report/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from evalscope.report.combinator import gen_table, get_data_frame, get_report_list
+from evalscope.report.generator import ReportGenerator
+from evalscope.report.utils import Category, Report, ReportKey, Subset

evalscope 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

Potentially problematic release.

evalscope 0.9.0py3-none-any.whl → 0.10.1py3-none-any.whl