PyPI - mteb - Versions diffs - 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl - Mend

mteb 2.5.2py3-none-any.whl → 2.7.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (529) hide show

mteb/filter_tasks.py CHANGED Viewed

@@ -1,19 +1,24 @@
 """This script contains functions that are used to get an overview of the MTEB benchmark."""
+from __future__ import annotations
 import logging
-from collections.abc import Sequence
-from typing import overload
+from typing import TYPE_CHECKING, overload
-from mteb.abstasks import (
-    AbsTask,
-)
 from mteb.abstasks.aggregated_task import AbsTaskAggregate
-from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
 from mteb.languages import (
     ISO_TO_LANGUAGE,
     ISO_TO_SCRIPT,
 )
-from mteb.types import Modalities
+if TYPE_CHECKING:
+    from collections.abc import Iterable, Sequence
+    from mteb.abstasks import (
+        AbsTask,
+    )
+    from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
+    from mteb.types import Modalities
 logger = logging.getLogger(__name__)
@@ -34,14 +39,14 @@ def _check_is_valid_language(lang: str) -> None:
 @overload
 def filter_tasks(
-    tasks: Sequence[AbsTask],
+    tasks: Iterable[AbsTask],
     *,
-    languages: list[str] | None = None,
-    script: list[str] | None = None,
-    domains: list[TaskDomain] | None = None,
-    task_types: list[TaskType] | None = None,  # type: ignore
-    categories: list[TaskCategory] | None = None,
-    modalities: list[Modalities] | None = None,
+    languages: Sequence[str] | None = None,
+    script: Sequence[str] | None = None,
+    domains: Iterable[TaskDomain] | None = None,
+    task_types: Iterable[TaskType] | None = None,
+    categories: Iterable[TaskCategory] | None = None,
+    modalities: Iterable[Modalities] | None = None,
     exclusive_modality_filter: bool = False,
     exclude_superseded: bool = False,
     exclude_aggregate: bool = False,
@@ -51,14 +56,14 @@ def filter_tasks(
 @overload
 def filter_tasks(
-    tasks: Sequence[type[AbsTask]],
+    tasks: Iterable[type[AbsTask]],
     *,
-    languages: list[str] | None = None,
-    script: list[str] | None = None,
-    domains: list[TaskDomain] | None = None,
-    task_types: list[TaskType] | None = None,  # type: ignore
-    categories: list[TaskCategory] | None = None,
-    modalities: list[Modalities] | None = None,
+    languages: Sequence[str] | None = None,
+    script: Sequence[str] | None = None,
+    domains: Iterable[TaskDomain] | None = None,
+    task_types: Iterable[TaskType] | None = None,
+    categories: Iterable[TaskCategory] | None = None,
+    modalities: Iterable[Modalities] | None = None,
     exclusive_modality_filter: bool = False,
     exclude_superseded: bool = False,
     exclude_aggregate: bool = False,
@@ -67,14 +72,14 @@ def filter_tasks(
 def filter_tasks(
-    tasks: Sequence[AbsTask] | Sequence[type[AbsTask]],
+    tasks: Iterable[AbsTask] | Iterable[type[AbsTask]],
     *,
-    languages: list[str] | None = None,
-    script: list[str] | None = None,
-    domains: list[TaskDomain] | None = None,
-    task_types: list[TaskType] | None = None,  # type: ignore
-    categories: list[TaskCategory] | None = None,
-    modalities: list[Modalities] | None = None,
+    languages: Sequence[str] | None = None,
+    script: Sequence[str] | None = None,
+    domains: Iterable[TaskDomain] | None = None,
+    task_types: Iterable[TaskType] | None = None,
+    categories: Iterable[TaskCategory] | None = None,
+    modalities: Iterable[Modalities] | None = None,
     exclusive_modality_filter: bool = False,
     exclude_superseded: bool = False,
     exclude_aggregate: bool = False,
@@ -92,7 +97,6 @@ def filter_tasks(
         task_types: A string specifying the type of task e.g. "Classification" or "Retrieval". If None, all tasks are included.
         categories: A list of task categories these include "t2t" (text to text), "t2i" (text to image). See TaskMetadata for the full list.
         exclude_superseded: A boolean flag to exclude datasets which are superseded by another.
-        eval_splits: A list of evaluation splits to include. If None, all splits are included.
         modalities: A list of modalities to include. If None, all modalities are included.
         exclusive_modality_filter: If True, only keep tasks where _all_ filter modalities are included in the
             task's modalities and ALL task modalities are in filter modalities (exact match).
@@ -113,12 +117,12 @@ def filter_tasks(
     """
     langs_to_keep = None
     if languages:
-        [_check_is_valid_language(lang) for lang in languages]
+        [_check_is_valid_language(lang) for lang in languages]  # type: ignore[func-returns-value]
         langs_to_keep = set(languages)
     script_to_keep = None
     if script:
-        [_check_is_valid_script(s) for s in script]
+        [_check_is_valid_script(s) for s in script]  # type: ignore[func-returns-value]
         script_to_keep = set(script)
     domains_to_keep = None
@@ -178,4 +182,4 @@ def filter_tasks(
         _tasks.append(t)
-    return _tasks
+    return _tasks  # type: ignore[return-value]  # type checker cannot infer the overload return type

mteb/get_tasks.py CHANGED Viewed

@@ -1,19 +1,25 @@
 """This script contains functions that are used to get an overview of the MTEB benchmark."""
+from __future__ import annotations
 import difflib
 import logging
+import warnings
 from collections import Counter, defaultdict
-from collections.abc import Sequence
-from typing import Any
+from typing import TYPE_CHECKING, Any
 import pandas as pd
 from mteb.abstasks import (
     AbsTask,
 )
-from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
 from mteb.filter_tasks import filter_tasks
-from mteb.types import Modalities
+if TYPE_CHECKING:
+    from collections.abc import Iterable, Sequence
+    from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
+    from mteb.types import Modalities
 logger = logging.getLogger(__name__)
@@ -22,12 +28,11 @@ logger = logging.getLogger(__name__)
 def _gather_tasks() -> tuple[type[AbsTask], ...]:
     import mteb.tasks as tasks
-    tasks = [
+    return tuple(
         t
         for t in tasks.__dict__.values()
         if isinstance(t, type) and issubclass(t, AbsTask)
-    ]
-    return tuple(tasks)
+    )
 def _create_name_to_task_mapping(
@@ -43,7 +48,7 @@ def _create_name_to_task_mapping(
     return metadata_names
-def _create_similar_tasks(tasks: Sequence[type[AbsTask]]) -> dict[str, list[str]]:
+def _create_similar_tasks(tasks: Iterable[type[AbsTask]]) -> dict[str, list[str]]:
     """Create a dictionary of similar tasks.
     Returns:
@@ -194,9 +199,8 @@ class MTEBTasks(tuple[AbsTask]):
             string with a LaTeX table.
         """
         if include_citation_in_name and "name" in properties:
-            properties += ["intext_citation"]
-            df = self.to_dataframe(properties)
-            df["name"] = df["name"] + " " + df["intext_citation"]
+            df = self.to_dataframe(tuple(properties) + ("intext_citation",))
+            df["name"] = df["name"] + " " + df["intext_citation"]  # type: ignore[operator]
             df = df.drop(columns=["intext_citation"])
         else:
             df = self.to_dataframe(properties)
@@ -221,17 +225,17 @@ class MTEBTasks(tuple[AbsTask]):
 def get_tasks(
-    tasks: list[str] | None = None,
+    tasks: Sequence[str] | None = None,
     *,
-    languages: list[str] | None = None,
-    script: list[str] | None = None,
-    domains: list[TaskDomain] | None = None,
-    task_types: list[TaskType] | None = None,  # type: ignore
-    categories: list[TaskCategory] | None = None,
+    languages: Sequence[str] | None = None,
+    script: Sequence[str] | None = None,
+    domains: Sequence[TaskDomain] | None = None,
+    task_types: Sequence[TaskType] | None = None,
+    categories: Sequence[TaskCategory] | None = None,
     exclude_superseded: bool = True,
-    eval_splits: list[str] | None = None,
+    eval_splits: Sequence[str] | None = None,
     exclusive_language_filter: bool = False,
-    modalities: list[Modalities] | None = None,
+    modalities: Sequence[Modalities] | None = None,
     exclusive_modality_filter: bool = False,
     exclude_aggregate: bool = False,
     exclude_private: bool = True,
@@ -287,7 +291,7 @@ def get_tasks(
         ]
         return MTEBTasks(_tasks)
-    _tasks = filter_tasks(
+    tasks_: Sequence[type[AbsTask]] = filter_tasks(
         TASK_LIST,
         languages=languages,
         script=script,
@@ -300,12 +304,12 @@ def get_tasks(
         exclude_aggregate=exclude_aggregate,
         exclude_private=exclude_private,
     )
-    _tasks = [
-        cls().filter_languages(languages, script).filter_eval_splits(eval_splits)
-        for cls in _tasks
-    ]
-    return MTEBTasks(_tasks)
+    return MTEBTasks(
+        [
+            cls().filter_languages(languages, script).filter_eval_splits(eval_splits)
+            for cls in tasks_
+        ]
+    )
 _TASK_RENAMES = {"PersianTextTone": "SynPerTextToneClassification"}
@@ -313,10 +317,10 @@ _TASK_RENAMES = {"PersianTextTone": "SynPerTextToneClassification"}
 def get_task(
     task_name: str,
-    languages: list[str] | None = None,
-    script: list[str] | None = None,
-    eval_splits: list[str] | None = None,
-    hf_subsets: list[str] | None = None,
+    languages: Sequence[str] | None = None,
+    script: Sequence[str] | None = None,
+    eval_splits: Sequence[str] | None = None,
+    hf_subsets: Sequence[str] | None = None,
     exclusive_language_filter: bool = False,
 ) -> AbsTask:
     """Get a task by name.
@@ -340,9 +344,9 @@ def get_task(
     """
     if task_name in _TASK_RENAMES:
         _task_name = _TASK_RENAMES[task_name]
-        logger.warning(
-            f"The task with the given name '{task_name}' has been renamed to '{_task_name}'. To prevent this warning use the new name."
-        )
+        msg = f"The task with the given name '{task_name}' has been renamed to '{_task_name}'. To prevent this warning use the new name."
+        logger.warning(msg)
+        warnings.warn(msg)
     if task_name not in _TASKS_REGISTRY:
         close_matches = difflib.get_close_matches(task_name, _TASKS_REGISTRY.keys())

mteb/languages/language_scripts.py CHANGED Viewed

@@ -1,9 +1,14 @@
-from collections.abc import Iterable
+from __future__ import annotations
 from dataclasses import dataclass
+from typing import TYPE_CHECKING
+from mteb.languages.check_language_code import check_language_code
-from typing_extensions import Self
+if TYPE_CHECKING:
+    from collections.abc import Iterable, Sequence
-from mteb.languages import check_language_code
+    from typing_extensions import Self
 @dataclass
@@ -25,7 +30,9 @@ class LanguageScripts:
     @classmethod
     def from_languages_and_scripts(
-        cls, languages: list[str] | None = None, scripts: list[str] | None = None
+        cls,
+        languages: Sequence[str] | None = None,
+        scripts: Sequence[str] | None = None,
     ) -> Self:
         """Create a LanguageScripts object from lists of languages and scripts.

mteb/leaderboard/app.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import itertools
 import json
 import logging
@@ -5,15 +7,14 @@ import tempfile
 import time
 import warnings
 from pathlib import Path
-from typing import Literal
+from typing import TYPE_CHECKING, Literal, get_args
 from urllib.parse import urlencode
 import cachetools
 import gradio as gr
-import pandas as pd
+import pandas as pd  # noqa: TC002 # gradio tries to validate typehints
 import mteb
-from mteb import BenchmarkResults
 from mteb.benchmarks.benchmark import RtebBenchmark
 from mteb.cache import ResultCache
 from mteb.leaderboard.benchmark_selector import (
@@ -29,40 +30,118 @@ from mteb.leaderboard.table import (
     apply_summary_styling_from_benchmark,
 )
 from mteb.leaderboard.text_segments import ACKNOWLEDGEMENT, FAQ
+from mteb.models.model_meta import MODEL_TYPES
+if TYPE_CHECKING:
+    from mteb import BenchmarkResults
 logger = logging.getLogger(__name__)
 LANGUAGE: list[str] = list({l for t in mteb.get_tasks() for l in t.metadata.languages})
+MODEL_TYPE_CHOICES = list(get_args(MODEL_TYPES))
 def _load_results(cache: ResultCache) -> BenchmarkResults:
+    """Load benchmark results using an optimized caching strategy.
+    This function implements a two-tier caching strategy for faster leaderboard startup:
+    1. **Primary Strategy (Fast)**: Download pre-computed cached results from the
+       'cached-data' branch as a compressed JSON file (~2MB vs ~200MB full repo).
+       This avoids the need to clone the entire results repository and provides
+       near-instantaneous loading for most users.
+    2. **Fallback Strategy (Slower)**: If the cached download fails, fall back to
+       the original approach of downloading the full results repository and
+       building the cache from scratch.
+    The cached results file contains pre-aggregated benchmark data that eliminates
+    the need for expensive operations like task selection and revision joining
+    during app startup.
+    Args:
+        cache: ResultCache instance used for both optimized and fallback operations
+    Returns:
+        BenchmarkResults: Complete benchmark results ready for leaderboard display
+    Raises:
+        Various exceptions related to network issues, file I/O, or data validation
+        are logged and may cause fallback to the slower repository-based approach.
+    """
     start_time = time.time()
     results_cache_path = Path(__file__).parent.joinpath("__cached_results.json")
     if not results_cache_path.exists():
-        logger.info("Cached results not found, downloading from remote...")
-        cache.download_from_remote()
-        download_time = time.time() - start_time
-        logger.info(f"Downloaded remote results in {download_time:.2f}s")
-        load_start = time.time()
-        all_model_names = [model_meta.name for model_meta in mteb.get_model_metas()]
-        all_results = cache.load_results(
-            models=all_model_names,
-            only_main_score=True,
-            require_model_meta=False,
-            include_remote=True,
+        # First try to download the cached results file from the cached-data branch
+        # This is faster than cloning the entire results repository
+        logger.info(
+            "Cached results not found, trying to download from cached-data branch..."
         )
-        load_time = time.time() - load_start
-        logger.info(f"Loaded results from cache in {load_time:.2f}s")
-        return all_results
-    else:
-        logger.info("Loading cached results from disk...")
+        try:
+            # Use ResultCache's optimized download method
+            # Default saves to mteb/leaderboard/__cached_results.json
+            results_cache_path = cache._download_cached_results_from_branch()
+            download_time = time.time() - start_time
+            logger.info(
+                f"Downloaded cached results from cached-data branch in {download_time:.2f}s"
+            )
+        except Exception as e:
+            logger.error(
+                f"Failed to download from cached-data branch: {type(e).__name__}: {e}"
+            )
+            logger.info("Falling back to downloading full remote repository...")
+            # Fall back to the original approach: clone the full repo
+            cache.download_from_remote()
+            download_time = time.time() - start_time
+            logger.info(f"Downloaded remote results in {download_time:.2f}s")
+            load_start = time.time()
+            all_model_names = [model_meta.name for model_meta in mteb.get_model_metas()]
+            all_results = cache.load_results(
+                models=all_model_names,
+                only_main_score=True,
+                require_model_meta=False,
+                include_remote=True,
+            )
+            load_time = time.time() - load_start
+            logger.info(f"Loaded results from cache in {load_time:.2f}s")
+            return all_results
+    # Load the cached results file (either pre-existing or just downloaded)
+    logger.info("Loading cached results from disk...")
+    try:
+        logger.info(f"Opening file: {results_cache_path}")
+        file_size = results_cache_path.stat().st_size
+        logger.info(f"File exists, size: {file_size} bytes")
         with results_cache_path.open() as cache_file:
-            results = mteb.BenchmarkResults.from_validated(**json.load(cache_file))
-        total_time = time.time() - start_time
-        logger.info(f"Loaded cached results in {total_time:.2f}s")
-        return results
+            logger.info("File opened successfully, attempting JSON parse...")
+            json_data = json.load(cache_file)
+            logger.info(
+                f"JSON parsed successfully, keys: {list(json_data.keys()) if isinstance(json_data, dict) else 'not a dict'}"
+            )
+        logger.info("Attempting BenchmarkResults.from_validated...")
+        results = mteb.BenchmarkResults.from_validated(**json_data)
+        logger.info("BenchmarkResults.from_validated successful")
+    except Exception as e:
+        # TODO: Handle the case when we fail to load cached results from disk.
+        logger.error(
+            f"Failed to load cached results from disk: {type(e).__name__}: {e}"
+        )
+        raise
+    total_time = time.time() - start_time
+    logger.info(f"Loaded cached results in {total_time:.2f}s")
+    return results
 def _produce_benchmark_link(benchmark_name: str, request: gr.Request) -> str:
@@ -169,7 +248,7 @@ def _update_task_info(task_names: str) -> gr.DataFrame:
     df = df.drop(columns="reference")
     return gr.DataFrame(
         df,
-        datatype=["markdown"] + ["str"] * (len(df.columns) - 1),  # type: ignore
+        datatype=["markdown"] + ["str"] * (len(df.columns) - 1),
         buttons=["copy", "fullscreen"],
         show_search="filter",
     )
@@ -187,6 +266,7 @@ def _filter_models(
     instructions: bool | None,
     max_model_size: int,
     zero_shot_setting: Literal["only_zero_shot", "allow_all", "remove_unknown"],
+    model_types: list[str] | None,
 ):
     lower, upper = 0, max_model_size
     # Setting to None, when the user doesn't specify anything
@@ -205,6 +285,7 @@ def _filter_models(
         use_instructions=instructions,
         frameworks=compatibility,
         n_parameters_range=(lower, upper),
+        model_types=model_types,
     )
     models_to_keep = set()
@@ -269,6 +350,7 @@ def _cache_on_benchmark_select(benchmark_name, all_benchmark_results):
         instructions=None,
         max_model_size=MAX_MODEL_SIZE,
         zero_shot_setting="allow_all",
+        model_types=MODEL_TYPE_CHOICES,
     )
     # Sort to ensure consistency with update_models
     initial_models = sorted(initial_models)
@@ -387,6 +469,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
         instructions=None,
         max_model_size=MAX_MODEL_SIZE,
         zero_shot_setting="allow_all",
+        model_types=MODEL_TYPE_CHOICES,
     )
     default_filtered_scores = [
         entry for entry in default_scores if entry["model_name"] in filtered_models
@@ -467,7 +550,10 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
     logger.info("Step 7/7: Building Gradio interface and callbacks...")
     interface_start = time.time()
-    with gr.Blocks(fill_width=True) as demo:
+    with gr.Blocks(
+        title="MTEB Leaderboard",
+        fill_width=True,
+    ) as demo:
         with gr.Sidebar(
             position="left",
             label="Benchmark Selection and Customization",
@@ -583,6 +669,12 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
                                     label="Model Parameters",
                                     interactive=True,
                                 )
+                            with gr.Column():
+                                model_type_select = gr.CheckboxGroup(
+                                    MODEL_TYPE_CHOICES,
+                                    value=MODEL_TYPE_CHOICES,
+                                    label="Model Type",
+                                )
         with gr.Tab("Summary"):
             summary_table.render()
@@ -755,7 +847,8 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
             compatibility,
             instructions,
             max_model_size,
-            zero_shot: hash(
+            zero_shot,
+            model_type_select: hash(
                 (
                     id(scores),
                     hash(tuple(tasks)),
@@ -764,6 +857,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
                     hash(instructions),
                     hash(max_model_size),
                     hash(zero_shot),
+                    hash(tuple(model_type_select)),
                 )
             ),
         )
@@ -775,6 +869,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
             instructions: bool | None,
             max_model_size: int,
             zero_shot: Literal["allow_all", "remove_unknown", "only_zero_shot"],
+            model_type_select: list[str],
         ):
             start_time = time.time()
             model_names = list({entry["model_name"] for entry in scores})
@@ -786,6 +881,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
                 instructions,
                 max_model_size,
                 zero_shot_setting=zero_shot,
+                model_types=model_type_select,
             )
             elapsed = time.time() - start_time
             logger.debug(f"update_models callback: {elapsed}s")
@@ -803,6 +899,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
                 instructions,
                 max_model_size,
                 zero_shot,
+                model_type_select,
             ],
             outputs=[models],
         )
@@ -817,6 +914,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
                 instructions,
                 max_model_size,
                 zero_shot,
+                model_type_select,
             ],
             outputs=[models],
         )
@@ -830,6 +928,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
                 instructions,
                 max_model_size,
                 zero_shot,
+                model_type_select,
             ],
             outputs=[models],
         )
@@ -843,6 +942,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
                 instructions,
                 max_model_size,
                 zero_shot,
+                model_type_select,
             ],
             outputs=[models],
         )
@@ -856,6 +956,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
                 instructions,
                 max_model_size,
                 zero_shot,
+                model_type_select,
             ],
             outputs=[models],
         )
@@ -869,6 +970,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
                 instructions,
                 max_model_size,
                 zero_shot,
+                model_type_select,
             ],
             outputs=[models],
         )
@@ -882,6 +984,21 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
                 instructions,
                 max_model_size,
                 zero_shot,
+                model_type_select,
+            ],
+            outputs=[models],
+        )
+        model_type_select.change(
+            update_models,
+            inputs=[
+                scores,
+                task_select,
+                availability,
+                compatibility,
+                instructions,
+                max_model_size,
+                zero_shot,
+                model_type_select,
             ],
             outputs=[models],
         )
@@ -1023,16 +1140,34 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
 if __name__ == "__main__":
-    logging.getLogger("mteb.load_results.task_results").setLevel(
-        logging.ERROR
-    )  # Warnings related to task split
-    logging.getLogger("mteb.model_meta").setLevel(
-        logging.ERROR
-    )  # Warning related to model metadata (fetch_from_hf=False)
-    logging.getLogger("mteb.load_results.benchmark_results").setLevel(
-        logging.ERROR
-    )  # Warning related to model metadata (fetch_from_hf=False)
+    import os
+    # Add process ID to logging for multiprocessing debugging
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - PID:%(process)d - %(name)s - %(levelname)s - %(message)s",
+        force=True,  # Override any existing handlers
+    )
+    # Flush log handlers immediately (helpful for multiprocessing)
+    for handler in logging.root.handlers:
+        handler.flush()
+    logger.info(f"Starting leaderboard app in process {os.getpid()}")
+    # Suppress specific WARNING messages while keeping INFO level for the app
+    logging.getLogger("mteb.results.task_result").setLevel(logging.ERROR)
+    logging.getLogger("mteb.models.model_meta").setLevel(logging.ERROR)
+    logging.getLogger("mteb.results.benchmark_results").setLevel(logging.ERROR)
     warnings.filterwarnings("ignore", message="Couldn't get scores for .* due to .*")
+    warnings.filterwarnings("ignore", message="Could not get source model: .*")
+    warnings.filterwarnings(
+        "ignore", message="No scores data available. Returning empty DataFrame."
+    )
+    warnings.filterwarnings("ignore", message="Main score .* not found in scores")
+    warnings.filterwarnings("ignore", message=".*: Missing subsets .* for split .*")
+    warnings.filterwarnings("ignore", message=".*: Missing splits .*")
     app = get_leaderboard_app()

mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

mteb 2.5.2py3-none-any.whl → 2.7.9py3-none-any.whl