PyPI - evalscope - Versions diffs - 1.0.2__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

evalscope 1.0.2py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of evalscope might be problematic. Click here for more details.

Files changed (87) hide show

evalscope/api/benchmark/adapters/default_data_adapter.py +12 -0
evalscope/app/ui/multi_model.py +6 -1
evalscope/app/ui/single_model.py +8 -2
evalscope/app/utils/data_utils.py +3 -2
evalscope/app/utils/visualization.py +2 -2
evalscope/benchmarks/ai2d/ai2d_adapter.py +3 -2
evalscope/benchmarks/bfcl/bfcl_adapter.py +10 -45
evalscope/benchmarks/blink/blink_adapter.py +61 -0
evalscope/benchmarks/chartqa/__init__.py +0 -0
evalscope/benchmarks/chartqa/chartqa_adapter.py +80 -0
evalscope/benchmarks/chartqa/utils.py +38 -0
evalscope/benchmarks/docvqa/__init__.py +0 -0
evalscope/benchmarks/docvqa/docvqa_adapter.py +67 -0
evalscope/benchmarks/general_arena/utils.py +2 -1
evalscope/benchmarks/hle/hle_adapter.py +3 -2
evalscope/benchmarks/infovqa/__init__.py +0 -0
evalscope/benchmarks/infovqa/infovqa_adapter.py +66 -0
evalscope/benchmarks/mm_bench/mm_bench_adapter.py +2 -2
evalscope/benchmarks/mmmu/mmmu_adapter.py +1 -1
evalscope/benchmarks/ocr_bench/__init__.py +0 -0
evalscope/benchmarks/ocr_bench/ocr_bench_adapter.py +101 -0
evalscope/benchmarks/ocr_bench_v2/IoUscore_metric.py +87 -0
evalscope/benchmarks/ocr_bench_v2/TEDS_metric.py +963 -0
evalscope/benchmarks/ocr_bench_v2/__init__.py +0 -0
evalscope/benchmarks/ocr_bench_v2/ocr_bench_v2_adapter.py +161 -0
evalscope/benchmarks/ocr_bench_v2/page_ocr_metric.py +50 -0
evalscope/benchmarks/ocr_bench_v2/parallel.py +46 -0
evalscope/benchmarks/ocr_bench_v2/spotting_eval/__init__.py +0 -0
evalscope/benchmarks/ocr_bench_v2/spotting_eval/readme.txt +26 -0
evalscope/benchmarks/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py +537 -0
evalscope/benchmarks/ocr_bench_v2/spotting_eval/script.py +481 -0
evalscope/benchmarks/ocr_bench_v2/spotting_metric.py +179 -0
evalscope/benchmarks/ocr_bench_v2/utils.py +432 -0
evalscope/benchmarks/ocr_bench_v2/vqa_metric.py +254 -0
evalscope/metrics/metric.py +51 -0
evalscope/metrics/metrics.py +16 -0
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +0 -0
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +0 -0
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +0 -0
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +0 -0
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +0 -0
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +0 -0
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +0 -0
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +0 -0
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +0 -0
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +0 -0
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +2 -6
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +2 -6
evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +2 -6
evalscope/report/__init__.py +9 -1
evalscope/report/combinator.py +52 -2
evalscope/utils/json_schema.py +8 -6
evalscope/utils/multi_choices.py +16 -1
evalscope/version.py +2 -2
{evalscope-1.0.2.dist-info → evalscope-1.1.0.dist-info}/METADATA +6 -32
{evalscope-1.0.2.dist-info → evalscope-1.1.0.dist-info}/RECORD +51 -54
{evalscope-1.0.2.dist-info → evalscope-1.1.0.dist-info}/top_level.txt +0 -1
tests/__init__.py +0 -1
tests/benchmark/__init__.py +0 -1
tests/benchmark/test_eval.py +0 -429
tests/benchmark/test_image_edit.py +0 -65
tests/benchmark/test_sandbox.py +0 -81
tests/benchmark/test_t2i.py +0 -142
tests/benchmark/test_vlm.py +0 -137
tests/cli/__init__.py +0 -1
tests/cli/test_all.py +0 -269
tests/cli/test_collection.py +0 -99
tests/cli/test_custom.py +0 -268
tests/cli/test_reasoning.py +0 -81
tests/common.py +0 -73
tests/perf/__init__.py +0 -1
tests/perf/test_perf.py +0 -206
tests/rag/test_clip_benchmark.py +0 -87
tests/rag/test_mteb.py +0 -213
tests/rag/test_ragas.py +0 -128
tests/swift/__init__.py +0 -1
tests/swift/test_run_swift_eval.py +0 -146
tests/swift/test_run_swift_vlm_eval.py +0 -128
tests/swift/test_run_swift_vlm_jugde_eval.py +0 -157
tests/test_run_all.py +0 -12
tests/utils.py +0 -13
tests/vlm/__init__.py +0 -1
tests/vlm/test_vlmeval.py +0 -102
{tests/rag → evalscope/benchmarks/blink}/__init__.py +0 -0
{evalscope-1.0.2.dist-info → evalscope-1.1.0.dist-info}/LICENSE +0 -0
{evalscope-1.0.2.dist-info → evalscope-1.1.0.dist-info}/WHEEL +0 -0
{evalscope-1.0.2.dist-info → evalscope-1.1.0.dist-info}/entry_points.txt +0 -0

evalscope/metrics/metric.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 from collections import defaultdict
 from typing import List
@@ -100,6 +101,56 @@ class MultiChoiceAcc(Metric):
         return res
+@register_metric(name='anls')
+class ANLS(Metric):
+    def __init__(self, thresh_hold=0.5):
+        self.thresh_hold = thresh_hold
+    def apply(self, predictions, references):
+        """
+        Calculate ANLS (Average Normalized Levenshtein Similarity) for a list of predictions and references.
+        This implementation is adapted from
+        https://github.com/QwenLM/Qwen-VL/blob/master/eval_mm/infographicsvqa_eval.py
+        Args:
+            references (List[str]): List of correct answers. Each answer can be a string of json.
+            predictions (List[str]): List of predicted answers.
+        """
+        from .metrics import levenshtein_distance
+        res = []
+        # Unwrap predictions if it's a nested list
+        for prediction, reference in zip(predictions, references):
+            # Parse the reference which is a json string
+            try:
+                answer = json.loads(reference)
+            except json.JSONDecodeError:
+                answer = reference
+            if isinstance(answer, str):
+                answer = [answer]
+            assert isinstance(answer, list), 'The reference answer should be a list of answers.'
+            # Calculate ANLS for each reference answer
+            values = []
+            for ans in answer:
+                # preprocess both the answers - gt and prediction
+                gt_answer = ' '.join(ans.strip().lower().split())
+                det_answer = ' '.join(prediction.strip().lower().split())
+                dist = levenshtein_distance(gt_answer, det_answer)
+                length = max(len(ans.upper()), len(prediction.upper()))
+                values.append(0.0 if length == 0 else float(dist) / float(length))
+            question_result = 0.0
+            if values:
+                question_result = 1 - min(values)
+                if question_result < self.thresh_hold:
+                    question_result = 0.0
+            res.append(question_result)
+        return res
 # ##################
 # T2I Metrics ######
 ####################

evalscope/metrics/metrics.py CHANGED Viewed

@@ -467,3 +467,19 @@ def calculate_pass_at_k(
         num_samples_it = iter(num_samples)
     return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])
+def levenshtein_distance(s1, s2):
+    if len(s1) > len(s2):
+        s1, s2 = s2, s1
+    distances = range(len(s1) + 1)
+    for i2, c2 in enumerate(s2):
+        distances_ = [i2 + 1]
+        for i1, c1 in enumerate(s1):
+            if c1 == c2:
+                distances_.append(distances[i1])
+            else:
+                distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
+        distances = distances_
+    return distances[-1]

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py CHANGED Viewed

File without changes

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py CHANGED Viewed

File without changes

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py CHANGED Viewed

File without changes

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py CHANGED Viewed

File without changes

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py CHANGED Viewed

File without changes

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py CHANGED Viewed

File without changes

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py CHANGED Viewed

File without changes

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py CHANGED Viewed

File without changes

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py CHANGED Viewed

File without changes

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py CHANGED Viewed

File without changes

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py CHANGED Viewed

@@ -30,13 +30,9 @@ from transformers.modeling_outputs import (
     SequenceClassifierOutput,
     TokenClassifierOutput,
 )
-from transformers.modeling_utils import (
-    PreTrainedModel,
-    apply_chunking_to_forward,
-    find_pruneable_heads_and_indices,
-    prune_linear_layer,
-)
+from transformers.modeling_utils import PreTrainedModel
 from transformers.models.bert.configuration_bert import BertConfig
+from transformers.pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
 from transformers.utils import logging
 from typing import Any, Dict, Optional, Tuple

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py CHANGED Viewed

@@ -14,13 +14,9 @@ from transformers.modeling_outputs import (
     BaseModelOutputWithPastAndCrossAttentions,
     BaseModelOutputWithPoolingAndCrossAttentions,
 )
-from transformers.modeling_utils import (
-    PreTrainedModel,
-    apply_chunking_to_forward,
-    find_pruneable_heads_and_indices,
-    prune_linear_layer,
-)
+from transformers.modeling_utils import PreTrainedModel
 from transformers.models.bert.configuration_bert import BertConfig
+from transformers.pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
 from transformers.utils import logging
 from typing import Tuple

evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py CHANGED Viewed

@@ -31,13 +31,9 @@ from transformers.modeling_outputs import (
     SequenceClassifierOutput,
     TokenClassifierOutput,
 )
-from transformers.modeling_utils import (
-    PreTrainedModel,
-    apply_chunking_to_forward,
-    find_pruneable_heads_and_indices,
-    prune_linear_layer,
-)
+from transformers.modeling_utils import PreTrainedModel
 from transformers.models.bert.configuration_bert import BertConfig
+from transformers.pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
 from transformers.utils import logging
 from typing import Optional, Tuple

evalscope/report/__init__.py CHANGED Viewed

@@ -4,7 +4,13 @@ from typing import TYPE_CHECKING
 from evalscope.utils.import_utils import _LazyModule
 if TYPE_CHECKING:
-    from .combinator import gen_table, get_data_frame, get_report_list
+    from .combinator import (
+        gen_table,
+        get_data_frame,
+        get_report_list,
+        unweighted_average_from_subsets,
+        weighted_average_from_subsets,
+    )
     from .generator import ReportGenerator
     from .report import Category, Report, ReportKey, Subset
@@ -14,6 +20,8 @@ else:
             'gen_table',
             'get_data_frame',
             'get_report_list',
+            'weighted_average_from_subsets',
+            'unweighted_average_from_subsets',
         ],
         'generator': [
             'ReportGenerator',

evalscope/report/combinator.py CHANGED Viewed

@@ -4,9 +4,9 @@ import glob
 import os
 import pandas as pd
 from tabulate import tabulate
-from typing import List, Tuple
+from typing import Dict, List, Tuple, Union
-from evalscope.report.report import Report
+from evalscope.report.report import Report, Subset
 from evalscope.utils.logger import get_logger
 logger = get_logger()
@@ -86,3 +86,53 @@ def gen_table(
         add_overall_metric=add_overall_metric
     )
     return tabulate(table, headers=table.columns, tablefmt='grid', showindex=False)
+def weighted_average_from_subsets(
+    subset_names: List[str], subset_dict: Dict[str, Subset], new_name: str = ''
+) -> Subset:
+    """Calculate weighted average for given subsets.
+    Args:
+        subset_names (List[str]): List of subset names to include in the average.
+        subset_dict (Dict[str, Subset]): Dictionary mapping subset names to Subset objects.
+        new_name (str): Name for the resulting Subset object.
+    Returns:
+        Subset: A new Subset object with weighted average score
+    """
+    total_score = 0
+    total_count = 0
+    for name in subset_names:
+        if name in subset_dict:
+            subset = subset_dict[name]
+            total_score += subset.score * subset.num
+            total_count += subset.num
+    weighted_avg = total_score / total_count if total_count > 0 else 0
+    return Subset(name=new_name, score=weighted_avg, num=total_count)
+def unweighted_average_from_subsets(
+    subset_names: List[str], subset_dict: Dict[str, Subset], new_name: str = ''
+) -> Subset:
+    """Calculate unweighted average for given subsets.
+    Args:
+        subset_names (List[str]): List of subset names to include in the average.
+        subset_dict (Dict[str, Subset]): Dictionary mapping subset names to Subset objects.
+        new_name (str): Name for the resulting Subset object.
+    Returns:
+        Subset: A new Subset object with unweighted average score
+    """
+    scores = []
+    total_count = 0
+    for name in subset_names:
+        if name in subset_dict:
+            subset = subset_dict[name]
+            scores.append(subset.score)
+            total_count += subset.num
+    unweighted_avg = sum(scores) / len(scores) if scores else 0
+    return Subset(name=new_name, score=unweighted_avg, num=total_count)

evalscope/utils/json_schema.py CHANGED Viewed

@@ -59,18 +59,20 @@ class JSONSchema(BaseModel):
     required: Optional[List[str]] = Field(default=None)
     """Required fields for object parameters."""
-    @field_validator('type')
-    def validate_type(cls, v: Optional[str]) -> Optional[JSONType]:
-        return python_type_to_json_type(v)
     @model_validator(mode='before')
     def convert_type_before_validation(cls, values):
         values = deepcopy(values)
         def recursive_convert_type(obj):
             if isinstance(obj, dict):
-                if 'type' in obj:
-                    obj['type'] = python_type_to_json_type(obj['type'])
+                # Convert 'type' field if it's a string
+                if 'type' in obj and isinstance(obj['type'], str):
+                    try:
+                        obj['type'] = python_type_to_json_type(obj['type'])
+                    except ValueError:
+                        # If conversion fails, leave it as is
+                        pass
+                # Recursively process nested structures
                 for k, v in obj.items():
                     obj[k] = recursive_convert_type(v)
             elif isinstance(obj, list):

evalscope/utils/multi_choices.py CHANGED Viewed

@@ -81,12 +81,27 @@ def answer_options(choices: Choices) -> str:
     return '\n'.join([f'{answer_character(i)}) {choices[j].value}' for i, j in enumerate(indexes)])
+def format_letter_choices(choices: Union[Choices, List[str]]) -> str:
+    """
+    Returns the `choices` formatted as a letter list, e.g.:
+    ["choice 1", "choice 2", "choice 3"] ->
+        "A,B,C"
+    """
+    if isinstance(choices, list):
+        choices = Choices(choices)
+    indexes = list(range(len(choices)))
+    return ','.join([f'{answer_character(i)}' for i in indexes])
 def prompt(question: str, choices: Union[Choices, List[str]], template: str, fewshot: Optional[str] = None) -> str:
     if isinstance(choices, list):
         choices = Choices(choices)
     choices_text = answer_options(choices)
-    letters = ','.join(answer_character(i) for i in range(len(choices)))
+    letters = format_letter_choices(choices)
     if not fewshot:
         return template.format(
             choices=choices_text,

evalscope/version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-__version__ = '1.0.2'
-__release_datetime__ = '2025-09-23 18:00:00'
+__version__ = '1.1.0'
+__release_datetime__ = '2025-10-14 14:00:00'

{evalscope-1.0.2.dist-info → evalscope-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,11 +1,11 @@
 Metadata-Version: 2.1
 Name: evalscope
-Version: 1.0.2
+Version: 1.1.0
 Summary: EvalScope: Lightweight LLMs Evaluation Framework
-Home-page: https://github.com/modelscope/evalscope
 Author: ModelScope team
 Author-email: contact@modelscope.cn
 License: Apache License 2.0
+Project-URL: Homepage, https://github.com/modelscope/evalscope
 Keywords: python,llm,evaluation
 Classifier: Development Status :: 4 - Beta
 Classifier: Operating System :: OS Independent
@@ -14,6 +14,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: License :: OSI Approved :: Apache Software License
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
@@ -56,35 +57,6 @@ Requires-Dist: peft>=0.17; extra == "aigc"
 Requires-Dist: torch; extra == "aigc"
 Requires-Dist: torchvision; extra == "aigc"
 Provides-Extra: all
-Requires-Dist: colorlog; extra == "all"
-Requires-Dist: datasets==3.6.0; extra == "all"
-Requires-Dist: docstring-parser; extra == "all"
-Requires-Dist: dotenv; extra == "all"
-Requires-Dist: jieba; extra == "all"
-Requires-Dist: jsonlines; extra == "all"
-Requires-Dist: langdetect; extra == "all"
-Requires-Dist: latex2sympy2-extended[antlr4_9_3]; extra == "all"
-Requires-Dist: matplotlib; extra == "all"
-Requires-Dist: modelscope[framework]>=1.27; extra == "all"
-Requires-Dist: nltk>=3.9; extra == "all"
-Requires-Dist: openai; extra == "all"
-Requires-Dist: overrides; extra == "all"
-Requires-Dist: pandas; extra == "all"
-Requires-Dist: pillow; extra == "all"
-Requires-Dist: pydantic; extra == "all"
-Requires-Dist: pyyaml>=5.1; extra == "all"
-Requires-Dist: requests; extra == "all"
-Requires-Dist: rich; extra == "all"
-Requires-Dist: rouge-chinese; extra == "all"
-Requires-Dist: rouge-score>=0.1.0; extra == "all"
-Requires-Dist: sacrebleu; extra == "all"
-Requires-Dist: scikit-learn; extra == "all"
-Requires-Dist: seaborn; extra == "all"
-Requires-Dist: sympy; extra == "all"
-Requires-Dist: tabulate; extra == "all"
-Requires-Dist: tqdm; extra == "all"
-Requires-Dist: transformers>=4.33; extra == "all"
-Requires-Dist: word2number; extra == "all"
 Requires-Dist: ms-opencompass>=0.1.6; extra == "all"
 Requires-Dist: ms-vlmeval>=0.0.17; extra == "all"
 Requires-Dist: langchain<0.4.0,>=0.3.0; extra == "all"
@@ -99,6 +71,7 @@ Requires-Dist: aiohttp; extra == "all"
 Requires-Dist: fastapi; extra == "all"
 Requires-Dist: jinja2; extra == "all"
 Requires-Dist: numpy; extra == "all"
+Requires-Dist: rich; extra == "all"
 Requires-Dist: sse-starlette; extra == "all"
 Requires-Dist: transformers; extra == "all"
 Requires-Dist: uvicorn; extra == "all"
@@ -266,7 +239,8 @@ Please scan the QR code below to join our community groups:
 > **Version 1.0 Refactoring**
 >
 > Version 1.0 introduces a major overhaul of the evaluation framework, establishing a new, more modular and extensible API layer under `evalscope/api`. Key improvements include standardized data models for benchmarks, samples, and results; a registry-based design for components such as benchmarks and metrics; and a rewritten core evaluator that orchestrates the new architecture. Existing benchmark adapters have been migrated to this API, resulting in cleaner, more consistent, and easier-to-maintain implementations.
+- 🔥 **[2025.10.14]** Added support for OCRBench, OCRBench-v2, DocVQA, InfoVQA, ChartQA, and BLINK multimodal image-text evaluation benchmarks.
+- 🔥 **[2025.09.22]** Code evaluation benchmarks (HumanEval, LiveCodeBench) now support running in a sandbox environment. To use this feature, please install [ms-enclave](https://github.com/modelscope/ms-enclave) first.
 - 🔥 **[2025.09.19]** Added support for multimodal image-text evaluation benchmarks including RealWorldQA, AI2D, MMStar, MMBench, and OmniBench, as well as pure text evaluation benchmarks such as Multi-IF, HealthBench, and AMC.
 - 🔥 **[2025.09.05]** Added support for vision-language multimodal model evaluation tasks, such as MathVista and MMMU. For more supported datasets, please [refer to the documentation](https://evalscope.readthedocs.io/zh-cn/latest/get_started/supported_dataset/vlm.html).
 - 🔥 **[2025.09.04]** Added support for image editing task evaluation, including the [GEdit-Bench](https://modelscope.cn/datasets/stepfun-ai/GEdit-Bench) benchmark. For usage instructions, refer to the [documentation](https://evalscope.readthedocs.io/en/latest/user_guides/aigc/image_edit.html).

{evalscope-1.0.2.dist-info → evalscope-1.1.0.dist-info}/RECORD RENAMED Viewed

@@ -4,14 +4,14 @@ evalscope/config.py,sha256=S2N11-AxQkT7lVffpjXdtpT4QpnSP6th-c8I-501mwM,11507
 evalscope/constants.py,sha256=W3E4Jp-x6qxvPOYtU9bNlzlERFvSAA_3F007apIwUlU,3601
 evalscope/run.py,sha256=A9_7pR3FiA-It46A3Mqk7ce6fQy548p0ux2QUugj2hI,6531
 evalscope/summarizer.py,sha256=HUDJ1zKi22uNst3AUfX67Z0sHzeZy-4S8sYyvxJnBzc,5901
-evalscope/version.py,sha256=H_zHGJkiB6equdW6Jo4F_hhdLYKZqriowav05O5_CeY,118
+evalscope/version.py,sha256=hqGJMtjd3F6yPJucqhuYtXuGYSumthFmroHsUTY761Y,118
 evalscope/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/api/registry.py,sha256=Qk0KMGDbt-iI0-OfoJZbOtxt76qreAVWh36HOoQAKM4,5448
 evalscope/api/benchmark/__init__.py,sha256=9xcTxpcQ6HhZ0QDwEIZhAT5IjybzaJ60VGLcmaFE5dU,188
 evalscope/api/benchmark/benchmark.py,sha256=gqAM81SeGb_Q0rA6Q-LFpnNkOUiwOj43aRWECtCxAOE,10832
 evalscope/api/benchmark/meta.py,sha256=N4u8NQjkjIw-xaf6KFnb6C8JDKB0DLbsXyXblDqIpvE,4304
 evalscope/api/benchmark/adapters/__init__.py,sha256=uLt_GiU4s-_6Rjgmr4OUTtE7dvEX-ZIQ403fd6oNuxA,264
-evalscope/api/benchmark/adapters/default_data_adapter.py,sha256=WS4Pm0pk51Se196Ho31FmOqGyOajTtUGbbjWD9U7UwU,28064
+evalscope/api/benchmark/adapters/default_data_adapter.py,sha256=RWDweSmXKGv5hPPjeV4VF76gbKqYJEsab_lQYGUM2PA,28785
 evalscope/api/benchmark/adapters/image_edit_adapter.py,sha256=06V-_A8RKuMNYMt7-vaXn2qBa9LIZgfFO_6PUuhAkh0,3052
 evalscope/api/benchmark/adapters/multi_choice_adapter.py,sha256=auqLNvF50Or9bo3LOmQLXHfFaTTCTqvQzZog3glInng,3062
 evalscope/api/benchmark/adapters/text2image_adapter.py,sha256=jO64hwjQexIv-MTyHH0Ffp_6p--9TKufOmX_U39mAnE,6385
@@ -50,15 +50,15 @@ evalscope/app/arguments.py,sha256=1wHTLeFx1G94cKXYOeOVe_wTiOY2D929UctIRGOtRaQ,69
 evalscope/app/constants.py,sha256=oG6tZ618zJcCnwZ5THnYL0gWTPDb5XKrnmdrWxY3Z4Q,385
 evalscope/app/ui/__init__.py,sha256=IBxyQ2H-kSHoHJmXWDR8QMermvsMbiu673PQbXP_FnE,616
 evalscope/app/ui/app_ui.py,sha256=wLrQ4VM7BnzvaYmPAk8NH9t5BaWooHFJcgmAOOd2I1w,2032
-evalscope/app/ui/multi_model.py,sha256=fO8z-ZFucWtgaKmuQ50AkUp4BoYOFqOkxeTBUUAK0bM,15122
+evalscope/app/ui/multi_model.py,sha256=mvMgpgiJGRrNRtReFcD_PiLatq-81zp65Vb3JYUP3PE,15356
 evalscope/app/ui/sidebar.py,sha256=JA0QbG2iPStK-lFy6x_AjOHlQdesmgXoS0OYJUJ_Wyg,1339
-evalscope/app/ui/single_model.py,sha256=1rgYrJOO75fJG2pa74tzEocO_91jXOAKFQAUViBcYFk,9459
+evalscope/app/ui/single_model.py,sha256=zFt1uDYrcgNJ7e_YLigrs6IXT3jyGMVn-7rv4CHAZvE,9741
 evalscope/app/ui/visualization.py,sha256=jXFX_-7woQkcAiQkPAIRwVv1kdRdXonn9IvmB8yzPDU,1102
-evalscope/app/utils/data_utils.py,sha256=m7Z0Us_josUFseI8VJpIp8QaYeLnu91E2HCZ8WSB07E,7396
+evalscope/app/utils/data_utils.py,sha256=GYOfkh0NoueeX3od-L852Q9C9SSkEFlW_40wjPa5b9w,7470
 evalscope/app/utils/env_utils.py,sha256=2pmz4uNun-XNP6TqM6Oe576XopweEClhBaIdWO--kd0,382
 evalscope/app/utils/localization.py,sha256=rWEviBmcnhIpAA-cG8djbbUA6p1Y358c0dxge5Pqi1U,6131
 evalscope/app/utils/text_utils.py,sha256=-K-hRPMZ29Yqjhzd-391gPaD4B4wUuIg71PfbLnGJ38,3754
-evalscope/app/utils/visualization.py,sha256=dwEXbGfY7vFysnL0HmrHS2BEWaJkg-dZ9ayDlRhdvv4,3559
+evalscope/app/utils/visualization.py,sha256=lycwcr-kFT2FKVw6iWMh3iD_n4dqpWVzhXMLDnkN8QY,3563
 evalscope/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/backend/base.py,sha256=qYu8Shokrtrx-N6T_BAJk_6OCpovUBYuN0p3wngt-dw,1030
 evalscope/backend/opencompass/__init__.py,sha256=UP_TW5KBq6V_Nvqkeb7PGvGGX3rVYussT43npwCwDgE,135
@@ -109,7 +109,7 @@ evalscope/backend/vlm_eval_kit/__init__.py,sha256=R-GuBm8dAwvDF73XHaGpPSjlt7Y4ty
 evalscope/backend/vlm_eval_kit/backend_manager.py,sha256=jlwM13Ty-Ax6AeMsNlo9xIBupNFgnceYuXtCmh0hNTQ,6160
 evalscope/benchmarks/__init__.py,sha256=WHR4ej9Tqa2N9CyIaUWXS8EnHZtcujaNeg9hf8GT31Y,1182
 evalscope/benchmarks/ai2d/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-evalscope/benchmarks/ai2d/ai2d_adapter.py,sha256=3GBNV4cNv9bBLJRdG_uA9qNhuN6qAEutHl8d-rsFpFU,2018
+evalscope/benchmarks/ai2d/ai2d_adapter.py,sha256=qnQT2E0ZG8g4noOafu-QvBOKm-zEJ5X08QHw3ekNa4w,2473
 evalscope/benchmarks/aime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/aime/aime24_adapter.py,sha256=HTlriHoHzlm1Rf3KAiGRLs8sx6Gyf6s7RGtOjk_hGS4,1767
 evalscope/benchmarks/aime/aime25_adapter.py,sha256=ZOE_6Zhg1MatWJSu2Zq372nKUODYtNFZimS1MJRFz5A,1591
@@ -152,10 +152,15 @@ evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt
 evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt,sha256=s_x6u5MLeKpuAHZj3GNQqY1I8vWqQIfJasOp9XcM7Ck,2945
 evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt,sha256=qfTZafCzNiz9ULBaDlfy_LISL617NyH5Nc0-nO0K0LE,2164
 evalscope/benchmarks/bfcl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-evalscope/benchmarks/bfcl/bfcl_adapter.py,sha256=WzpL7XWDdx-EvbLluIOiMlADTO42CYs0IwQFvIfhTI0,18402
+evalscope/benchmarks/bfcl/bfcl_adapter.py,sha256=ZmwGylqXCAcpJ8glQmj7HkDa8OqE9KODiHvWelTGLIo,17033
 evalscope/benchmarks/bfcl/generation.py,sha256=c6lNjo-VTSUrVg-pqyPSucrbCKBOdBSyN0aR5AAtE4A,8701
+evalscope/benchmarks/blink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+evalscope/benchmarks/blink/blink_adapter.py,sha256=ocQKsDGwnUAg2si2p7tqIGeH3PKPqTSByjbt7ceraRo,2642
 evalscope/benchmarks/ceval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
 evalscope/benchmarks/ceval/ceval_adapter.py,sha256=4FLPgY-UtqINafnNxfOsE9AwS6GFXFCUGOBI-4EZUGk,8503
+evalscope/benchmarks/chartqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+evalscope/benchmarks/chartqa/chartqa_adapter.py,sha256=DA1kthMUvn4_GUfdRfuR-au3RkhE3WKPnR_f8nlhd4c,2813
+evalscope/benchmarks/chartqa/utils.py,sha256=Ta9ZUMpIqzrAszju7_WOMBAlilH1Tx6TCheVpjrZJJI,1672
 evalscope/benchmarks/chinese_simple_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py,sha256=OWzRlSGswV24V-heLqqo7GQzpJp01TZ0DhFHq0iUP9A,8238
 evalscope/benchmarks/cmmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
@@ -167,6 +172,8 @@ evalscope/benchmarks/data_collection/data_collection_adapter.py,sha256=eetF21dN5
 evalscope/benchmarks/docmath/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/docmath/docmath_adapter.py,sha256=-mel6hA-x_e7fV0uOHdX5BpoQEVyQ5VqwIwEqSNDpnc,4623
 evalscope/benchmarks/docmath/utils.py,sha256=d6Yjoa5q91kjr1SdVPVBndzDaUzMlO_GfEqMtUXXr0s,7707
+evalscope/benchmarks/docvqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+evalscope/benchmarks/docvqa/docvqa_adapter.py,sha256=xGaayycILYoLd8r6wLLppDbU6Z1FdafbYFyjLHaftAA,2882
 evalscope/benchmarks/drop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/drop/drop_adapter.py,sha256=Jbbr5O_Y5LI_vT_RskRQVKxGkiIraX_uXP7fYaZ5eZs,9995
 evalscope/benchmarks/drop/utils.py,sha256=Z9PHrNnRfGqFHCLONg5SWKARp1eTJlHFc_bU46t_YrM,1344
@@ -175,7 +182,7 @@ evalscope/benchmarks/frames/frames_adapter.py,sha256=w1kRya7w5omt95HHE6AzbzYVhyT
 evalscope/benchmarks/frames/utils.py,sha256=gULWM6Rwv5bTSSWcDYp-iSIoWj8r5VtbQakhRzHJq8A,1172
 evalscope/benchmarks/general_arena/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/general_arena/general_arena_adapter.py,sha256=DzJaokqZwR2L8HDiahss8EbQ3vcsMXkzkMghxU-uAOo,21639
-evalscope/benchmarks/general_arena/utils.py,sha256=zS4l1RKwvl0Z9Mk7kth9WVQGHTgE_aNDZa_XNy9tGyM,6874
+evalscope/benchmarks/general_arena/utils.py,sha256=p6pZfvdNCMOU_vWHm_DYU57Sa2WTDdFOkVBubblCRN4,6912
 evalscope/benchmarks/general_mcq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/general_mcq/general_mcq_adapter.py,sha256=7VKg_EzXkRvoWpR7h8qB4sVVb1eZHCGcPk-X_NMS5tE,2062
 evalscope/benchmarks/general_qa/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
@@ -191,7 +198,7 @@ evalscope/benchmarks/healthbench/utils.py,sha256=M8SnOEhlqXWm03CFE6CAtbMiu6MqdGg
 evalscope/benchmarks/hellaswag/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
 evalscope/benchmarks/hellaswag/hellaswag_adapter.py,sha256=tAe63NfV5ljUm1f4RTSFxWOVKBUhk3Cc0EGzF5uYLK4,2041
 evalscope/benchmarks/hle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-evalscope/benchmarks/hle/hle_adapter.py,sha256=4YVmETL9mEiLxF4vWRjePLyFaxelax6nOaqoAH5ZxmU,6389
+evalscope/benchmarks/hle/hle_adapter.py,sha256=kJP7bzIDbr82GKi0FTy2zf_j1UWNBfuXYzokYJ-S9WE,6410
 evalscope/benchmarks/humaneval/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
 evalscope/benchmarks/humaneval/humaneval_adapter.py,sha256=uLs3UHSALS3YHt0qzBismrIqdEUgbEalQbjC0CU7ym4,4085
 evalscope/benchmarks/humaneval/utils.py,sha256=rPnc_JuSjNg9aV7UMUwsLrDlm-ufj64GNIBCWBeuRcM,6517
@@ -206,6 +213,8 @@ evalscope/benchmarks/image_edit/gedit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
 evalscope/benchmarks/image_edit/gedit/gedit_adapter.py,sha256=a6hhRbnGCvMEMsbnSbczjXd4vHfMVEnFfP459FCF_Mc,5250
 evalscope/benchmarks/image_edit/gedit/utils.py,sha256=UN0z9Dafs8d8lEXqxin321d8smiS3H9p3gyLkZFPFNg,14735
 evalscope/benchmarks/image_edit/gedit/vie_prompts.py,sha256=qVXWQyVUwZxEasDjVmYBk30_JI4gnvHacMOmMsA4wcI,22056
+evalscope/benchmarks/infovqa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+evalscope/benchmarks/infovqa/infovqa_adapter.py,sha256=3m_EvfRZ5ItHkz-3mVlsF_NnPS7NH1-EXwUW-s4VMxA,2617
 evalscope/benchmarks/iquiz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/iquiz/iquiz_adapter.py,sha256=mNHA_Fuj_gAdOEoR7oChnGmErf1czqwnk8Zk-jRhBys,1304
 evalscope/benchmarks/live_code_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -226,7 +235,7 @@ evalscope/benchmarks/math_vista/math_vista_adapter.py,sha256=Mu9BpH0rDNM0yMrGws4
 evalscope/benchmarks/minerva_math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/minerva_math/minerva_math_adapter.py,sha256=jyT9_D4w8PTtLBN3Kn10_CnssH_mPuRNnn9rek_zUEs,1655
 evalscope/benchmarks/mm_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-evalscope/benchmarks/mm_bench/mm_bench_adapter.py,sha256=py0DakGQX1JE2rqYjYN9w_-H0DtQ-YqG5k2s_UzbxxU,4372
+evalscope/benchmarks/mm_bench/mm_bench_adapter.py,sha256=ht2DVt_zEBJp4jvGy3myHHgdUUP9eff2O5BpIc9Fv74,4376
 evalscope/benchmarks/mm_star/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/mm_star/mm_star_adapter.py,sha256=oamLv6U2-JAK5mdVLkUgYxkOahxQkQYMRKAyu_xPAUE,2818
 evalscope/benchmarks/mmlu/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
@@ -236,7 +245,7 @@ evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py,sha256=GtIyUubUg6Q6Ydh1Adj0-32
 evalscope/benchmarks/mmlu_redux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py,sha256=m_37OIFrJB4ZIvtbDJ_m9P9mA2QtrNjGfbbVo15awJg,7402
 evalscope/benchmarks/mmmu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-evalscope/benchmarks/mmmu/mmmu_adapter.py,sha256=C7UM6HvomcA_Srf7771S0CaUvifBX63i161XaacraGQ,6038
+evalscope/benchmarks/mmmu/mmmu_adapter.py,sha256=WrykWq8n61CVrQ4XQhI3iEySgErHdZyng3udOL-Pddk,6054
 evalscope/benchmarks/mmmu_pro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/mmmu_pro/mmmu_pro_adapter.py,sha256=banPS1nDt9bQ95urKbSZnR-hBTw23eL9MSrHt_0ZLp0,4725
 evalscope/benchmarks/multi_if/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -248,6 +257,21 @@ evalscope/benchmarks/musr/musr_adapter.py,sha256=kx6bckj7Nijl4Wysuj-mKYdy0hIRDJh
 evalscope/benchmarks/needle_haystack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py,sha256=GYaswWPwYI3aV5HSpcuBTgW9-HDtf2xzNZg0WrsI0Yo,17033
 evalscope/benchmarks/needle_haystack/utils.py,sha256=k8WDigqt5LgzHw6DtaYsLtb3BJL0FTZS9JOyJCpoPq8,2935
+evalscope/benchmarks/ocr_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+evalscope/benchmarks/ocr_bench/ocr_bench_adapter.py,sha256=gkQb7g0-Lf5Sjemqs5kqogCLGFJI6YQv8-vGI1EbyLE,4392
+evalscope/benchmarks/ocr_bench_v2/IoUscore_metric.py,sha256=cBpRDJvI9f6vKRD4wTPv-8ThGddR3EhVobgjQQUAYlE,2606
+evalscope/benchmarks/ocr_bench_v2/TEDS_metric.py,sha256=31bL0V32Fq7prF1WoVjXmrmMdhg0qNcoiOaKykKOrZM,36528
+evalscope/benchmarks/ocr_bench_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+evalscope/benchmarks/ocr_bench_v2/ocr_bench_v2_adapter.py,sha256=QGY4R75UxDafIwSaOEPPuCaX3Z8BGoZVvcc6OWbeO9w,7976
+evalscope/benchmarks/ocr_bench_v2/page_ocr_metric.py,sha256=d1nU7LNwubBd_1rIe7i67hOVcJx5IUXkqVeqt1CQzak,1624
+evalscope/benchmarks/ocr_bench_v2/parallel.py,sha256=Q54wFSSRBp-kG2MhW4eOoXE1W9g-SDVhN8JuphDERsE,2029
+evalscope/benchmarks/ocr_bench_v2/spotting_metric.py,sha256=nftLaTOKEmqvSWr-c20f9hyyvNnd-Hg3E46KwqmkjLc,6149
+evalscope/benchmarks/ocr_bench_v2/utils.py,sha256=z9DSh2m1yvM3vsvxvqdHuPgRFxgdmEnzuNIuO7PAV3s,15914
+evalscope/benchmarks/ocr_bench_v2/vqa_metric.py,sha256=XkAiXk1uE7lsWQQXvjnHXZMsga8B9FVyq5qG8ghePK4,8980
+evalscope/benchmarks/ocr_bench_v2/spotting_eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+evalscope/benchmarks/ocr_bench_v2/spotting_eval/readme.txt,sha256=QO0K9z1ethy_lgs9vaxGN1u5DnPFsssp8z62Cni24iw,1424
+evalscope/benchmarks/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py,sha256=qCuqDtsCfxAiQHYLNdHU7BQ9kLIZ9iyfmRxtIrGOBck,20349
+evalscope/benchmarks/ocr_bench_v2/spotting_eval/script.py,sha256=7HzM1PEw8wNOhmQOsZe582Y2rr4u66Q3JKVvvMasntE,19565
 evalscope/benchmarks/olympiad_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py,sha256=zePVmGjmyuwCWVb4h1PIQKAIFqBehwRwO2WOD0KX_ik,6565
 evalscope/benchmarks/olympiad_bench/utils.py,sha256=w7vEZcT3vCVq8_DSMgAjZPpVFVHStJPJYsPkrs-yOFM,21412
@@ -302,8 +326,8 @@ evalscope/filters/selection.py,sha256=yiJu2JjXDH_lgfEtB9umkGcA3zpo3zvnyoq2mKrXbn
 evalscope/metrics/__init__.py,sha256=1giVHESSjn98uBiAvYm5uLsmRQwmf9NHPSt7OT_QJss,1615
 evalscope/metrics/llm_judge.py,sha256=XukhH9PQtIZAcbjJlOmOD9ye3ngRv_IGKKJE9jhheOE,8653
 evalscope/metrics/math_parser.py,sha256=BMfautQtNNiF9f2DIEfO6SXSn_GYhzaddAjGWG10MJA,17257
-evalscope/metrics/metric.py,sha256=CabKKEbw_DptyH1ZQju7WzjB47fWUKdOhFB1ROpUC-4,10871
-evalscope/metrics/metrics.py,sha256=VxAggzEfaLKxWcXyuve8QbEBwV2W71udVyt0gynzGec,14134
+evalscope/metrics/metric.py,sha256=KNp_DNi9Ntq4my5G7La7AlP2Vj1p6hIgOheAh-4go5Q,12861
+evalscope/metrics/metrics.py,sha256=Y7TQ6MYaGE32EntTz-18CmQqYMpo1rQSvUiSwzBgpaQ,14599
 evalscope/metrics/rouge_metric.py,sha256=bqvSotuDdC0MEKmt8v6y6tBTBx0S3Ma-tfF-cMCckA4,4645
 evalscope/metrics/bundled_rouge_score/__init__.py,sha256=PwbTdk8168FwDJe_l8XIqDuBgZQooDsP31vj7di05Fs,650
 evalscope/metrics/bundled_rouge_score/rouge_scorer.py,sha256=T91PgJfi1As7BR7I-Hq6rLlvHAtMB9JpBw9gMTH8VlE,12114
@@ -381,9 +405,9 @@ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py,sh
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py,sha256=OOr1JD9kTlUGXZNG5b3kvkUaNz7QTmhaGoHhIKL69qo,7613
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py,sha256=Ns7oM4KpKxWZTo8Lefe4EDFw-jzp5633zAArcWjoVZA,9772
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py,sha256=KIF5tsiE7a5dbDfa-IKwzuzMUpuEAQPrm1nWFFtAeoI,20032
-evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=uhaehowhTqRhQtq_dVCgF-9Iu4yU19AMxx2sJimYwlA,52711
+evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py,sha256=85ZvV2gKSnsbP5941PeJ-JJ4t8_lOYQe1EOxrHlIbNI,52728
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py,sha256=o5ykt3Q_WQlNmyxjQaS2-KPLGq1xqLZixNYam_Bs6NA,18701
-evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=aBKdQQS7cHMPgYqIknCdHCZ7j2_QLACPn_jU_njiMIs,46840
+evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py,sha256=NPDpIRxjiroafZk5Z2uA9bC8Bi-yXY7um5HXxThF7N0,46857
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py,sha256=s7EkhtrIJ0LPUuLBArws8N23R1MoIoNaYUjwsbUqRkY,7994
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py,sha256=FnUyxxazEVaP69pAq9cig3j-mcX37BX-unPj0SVKUJI,3805
@@ -403,7 +427,7 @@ evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/bl
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py,sha256=TOAI-KaUrtKjR1GNU_WwNXNpb9gGT-KX2FYe3muv_e0,4275
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py,sha256=-DprR09KYuwNEzEbhPvFRI3MR4_VdPMUGLPN6sL9Ym8,14625
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py,sha256=S68U0DxWYGDmreRbH5yLDHBNN9PsczY9H0Uik0hO-ds,13872
-evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=i1XlJe_PTSmiPkZKIhUXC_lc0-z2ewNYo4W1DvZQxjY,36678
+evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py,sha256=zv_WyHi67hvgHQ4DkZ8a4UoPcgrADKayqVtiIq-p3V4,36695
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py,sha256=p67DDiFS-676z0z8jPj6NwXwNjEsqTXaXCh3g2UiDno,840
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py,sha256=LqMHlUTy2LEzoVwjALtrAw0UYmzIuHnFjQiVmn5nv-I,605
 evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py,sha256=d4HInkL_Phk0Bgg2cWaOvhsPa6lkqDeovFW86PL0I18,6371
@@ -448,8 +472,8 @@ evalscope/perf/utils/handler.py,sha256=HyKIxbzC0XCyQanlbb7UEY7yaeqjJTePNea8kMV3S
 evalscope/perf/utils/local_server.py,sha256=_lSPlNEnOmPA_DtREgPS_vj2w_7D8PPSpypXbb0YfJM,4880
 evalscope/perf/utils/log_utils.py,sha256=NWSK_ITG4yoVx5GMLbIRGDoXSs90s7X3mftdm37Os2U,1666
 evalscope/perf/utils/rich_display.py,sha256=AQmXv1EuA1-IGgco-Jy1NLOmTKv4eBFH2K4QS8OoGVo,8206
-evalscope/report/__init__.py,sha256=DTigCg9fkU_zGNDqIaZy3CWYbrlvODvCxCTVqSx6ano,875
-evalscope/report/combinator.py,sha256=Xzlhs7kwfI6cgs7rngxhvsur0bCJkrM0tAy6isq2VME,3235
+evalscope/report/__init__.py,sha256=xS6eeTgsPdIlIOhzUn-ND77uV34vMVug4PmXHmYAxwM,1080
+evalscope/report/combinator.py,sha256=F7KOClXVh56-XEw3Sb5uxwA6L8ZlH_P4-MOlm3Yp_Cg,5020
 evalscope/report/generator.py,sha256=t2R3WGa4SowTRUPOgITtyTR4QDiJ6i3FH__byDKZU8Y,4959
 evalscope/report/report.py,sha256=lEBD_E_RJiydFTaGFNLIMTFxNrqv8QcLZb_iuUg5HB0,8479
 evalscope/third_party/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
@@ -494,41 +518,14 @@ evalscope/utils/deprecation_utils.py,sha256=aDv3HFNcJFZ7rxNgALQP0-ITO8L23HC_RX-C
 evalscope/utils/function_utils.py,sha256=E-AIzx_PKrZDGl1cBvlvqNvMa8yM2WUJ2wh73PNBXrQ,1887
 evalscope/utils/import_utils.py,sha256=S0WQ3gt4zpwJHjGcyC-604pWWExg3JV7f3wzoOH-tuo,5794
 evalscope/utils/io_utils.py,sha256=79F0p7dFxA84tIVSL_C4piJgeQQtVUfb2R_Xcd8v_cE,11615
-evalscope/utils/json_schema.py,sha256=ZExvQA-SI6SxWBx_hCmuQ2RRqwGKuywy4sTotvd2hH0,8288
+evalscope/utils/json_schema.py,sha256=GVP1m6g4mBrsFmOWOOVnmvl2joOz8gTlGEytLv5qy7s,8451
 evalscope/utils/logger.py,sha256=roFk4Su4aJwsF0s-uYc5-tABnghwYPX3gpkA5QUGzK8,5675
 evalscope/utils/model_utils.py,sha256=mdtYoHhUdfpxUtnS52XZjNdO3uSK4yeIBHT3aDU7s-A,2455
-evalscope/utils/multi_choices.py,sha256=OxBER7amWpoRY0Z-o39rDmCNK6wpr1HQm9mMHpWLgp0,9524
+evalscope/utils/multi_choices.py,sha256=0UJbgr5eXNgitPC79JLcyUU-OXg9BlM-mVk-fWtUSno,9881
 evalscope/utils/url_utils.py,sha256=9HcFt9uZNbOJR3ADUFQ_dBFKziHV6H66Df7HYs1M4Po,1757
-tests/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
-tests/common.py,sha256=BB136KcGaEfdWqMwApa48K0CTSGmOCUZ0FYDqpfYnAA,2423
-tests/test_run_all.py,sha256=YcMTlWoFpvWY8jevWyIf2G_tz8hgDD1cAwSvmyZt96M,429
-tests/utils.py,sha256=Fgm0CU6ilZjCGOfOMJH-Trxy0UIAGbhvy0Ijy_zDGUk,323
-tests/benchmark/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
-tests/benchmark/test_eval.py,sha256=vSAvhiCKxHpjHdGhZn8l0qzPSiG1ZZafz_M06B_a8_Y,13827
-tests/benchmark/test_image_edit.py,sha256=z3z7psMRFynpVgUAFoH--ieeGXzb9cHkrq3tT_sCZo8,2165
-tests/benchmark/test_sandbox.py,sha256=bHyX8ammdn7EsEbN80cIzDNhQZlJD3Ssoj9l4efF7rI,2968
-tests/benchmark/test_t2i.py,sha256=fciaGsOrkOpT4WQlsnmjrqw6qolCzI0DGyWQAJkM-Es,4513
-tests/benchmark/test_vlm.py,sha256=gn0ledf_yPY1IhCyCtiqT_dTVPUVZ3NVPr9yzsC_UZQ,4501
-tests/cli/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
-tests/cli/test_all.py,sha256=1omOXC1lBphBLm0hTf5HNstlF_bwi16dYyr00gvaCTM,7301
-tests/cli/test_collection.py,sha256=lGz3YUS_0gM6_HjQLe26OfBAkHOPOEDWMO-UyP58GN8,4455
-tests/cli/test_custom.py,sha256=9z_N7Re712xI62TqVSTBdzB_iFFEUb55wcWIcGvJb84,9254
-tests/cli/test_reasoning.py,sha256=rU181LLoKbFCpNPFCIZULxEgsJ2PYswel2pP2EsjEmo,2696
-tests/perf/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
-tests/perf/test_perf.py,sha256=ugYNEyU32ctryPFa_6fr8aQYxfHJMymdKnKKEHM9Ajc,6174
-tests/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/rag/test_clip_benchmark.py,sha256=qpSLgmHMGcYTnxP7AI__y-ii5_tu_fCSht6p3TBetkA,2650
-tests/rag/test_mteb.py,sha256=fdNQIyUEzE7puPCKw5QhCHTEu7hz-ieHeq1xCWGh6IM,7246
-tests/rag/test_ragas.py,sha256=5qozXvPFIb67T-igJv87ijlOgkPnqgkkBVXu6Ht4D0A,4554
-tests/swift/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
-tests/swift/test_run_swift_eval.py,sha256=YbIhYNoI4kAB-ox-OXAKUifLIXTFqP-xGZicrAgK_V0,5784
-tests/swift/test_run_swift_vlm_eval.py,sha256=RwrKkc1WHEZxetM11cGL81G4faKCn7SYn4VlwL03atI,4934
-tests/swift/test_run_swift_vlm_jugde_eval.py,sha256=UAUtOCQ72xbm8s-sov3cBEpYVDy189wpB-qOL3KoU7M,6053
-tests/vlm/__init__.py,sha256=I_ANdxdcIHpkIzIXc1yKOlWwzb4oY0FwTPq1kYtgzQw,50
-tests/vlm/test_vlmeval.py,sha256=EDQRkYfSyOICUwo_tm3p-puaE_xdFmqOPkrt5etxsqM,3307
-evalscope-1.0.2.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
-evalscope-1.0.2.dist-info/METADATA,sha256=vZciS7qNosSJOdwyRSxsCyVqvw8hyqKS84yKjlbxwzw,40305
-evalscope-1.0.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-evalscope-1.0.2.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
-evalscope-1.0.2.dist-info/top_level.txt,sha256=Yv0iprOqZQ4rfUO-AWJp7Ni6m0Twxny1yvZwO-8hUDM,16
-evalscope-1.0.2.dist-info/RECORD,,
+evalscope-1.1.0.dist-info/LICENSE,sha256=K_2M03pN0PxVMyx9IQUKsHGhhDMkw5ryQ02rlMvzj3I,11416
+evalscope-1.1.0.dist-info/METADATA,sha256=pap4NeCTqw7bec2KqYboFj25zabm1m5rwoiqukX8EO4,39544
+evalscope-1.1.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+evalscope-1.1.0.dist-info/entry_points.txt,sha256=Qr4oTgGhg_K-iUtKwVH6lWUhFHDUiH9trIqydHGTEug,56
+evalscope-1.1.0.dist-info/top_level.txt,sha256=jNR-HMn3TR8Atolq7_4rW8IWVX6GhvYV5_1Y_KbJKlY,10
+evalscope-1.1.0.dist-info/RECORD,,

{evalscope-1.0.2.dist-info → evalscope-1.1.0.dist-info}/top_level.txt RENAMED Viewed

	@@ -1,2 +1 @@
1 1	evalscope
2	- tests

evalscope 1.0.2__py3-none-any.whl → 1.1.0__py3-none-any.whl

Potentially problematic release.

evalscope 1.0.2py3-none-any.whl → 1.1.0py3-none-any.whl