PyPI - mteb - Versions diffs - 2.3.1__py3-none-any.whl → 2.3.3__py3-none-any.whl - Mend

mteb 2.3.1py3-none-any.whl → 2.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

mteb/benchmarks/_create_table.py +23 -34
mteb/benchmarks/benchmarks/__init__.py +2 -0
mteb/benchmarks/benchmarks/benchmarks.py +62 -1
mteb/leaderboard/app.py +161 -127
mteb/leaderboard/benchmark_selector.py +5 -2
mteb/leaderboard/figures.py +13 -15
mteb/leaderboard/table.py +19 -6
mteb/models/model_implementations/clips_models.py +97 -0
mteb/models/model_implementations/cohere_models.py +3 -0
mteb/models/model_implementations/google_models.py +0 -4
mteb/models/model_implementations/kennethenevoldsen_models.py +72 -0
mteb/models/model_implementations/linq_models.py +2 -2
mteb/models/model_implementations/ru_sentence_models.py +9 -0
{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/METADATA +4 -4
{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/RECORD +19 -17
{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/WHEEL +0 -0
{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/entry_points.txt +0 -0
{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/licenses/LICENSE +0 -0
{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/top_level.txt +0 -0

mteb/benchmarks/_create_table.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import math
 import re
 from collections import defaultdict
@@ -32,26 +31,18 @@ def _split_on_capital(s: str) -> str:
     return " ".join(re.findall(r"[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)", s))
-def _format_n_parameters(n_parameters) -> str:
-    if (n_parameters is None) or (not int(n_parameters)):
-        return "Unknown"
-    n_thousand = int(n_parameters // 1e3)
-    if n_thousand < 1:
-        return str(int(n_parameters))
-    n_zeros = math.log10(n_thousand)
-    if n_zeros >= 6:
-        return str(n_thousand // (10**6)) + "B"
-    if n_zeros >= 3:
-        return str(n_thousand // (10**3)) + "M"
-    return str(n_thousand) + "K"
+def _format_n_parameters(n_parameters) -> float | None:
+    """Format n_parameters to be in billions with decimals down to 1 million. I.e. 7M -> 0.007B, 1.5B -> 1.5B, None -> None"""
+    if n_parameters:
+        n_parameters = float(n_parameters)
+        return round(n_parameters / 1e9, 3)
+    return None
-def _format_max_tokens(max_tokens: float | None) -> str:
-    if max_tokens is None:
-        return "Unknown"
-    if max_tokens == np.inf:
-        return "Infinite"
-    return str(int(max_tokens))
+def _format_max_tokens(max_tokens: float | None) -> float | None:
+    if max_tokens is None or max_tokens == np.inf:
+        return None
+    return float(max_tokens)
 def _get_means_per_types(per_task: pd.DataFrame):
@@ -144,18 +135,18 @@ def _create_summary_table_from_benchmark_results(
     joint_table.insert(
         1,
         "Embedding Dimensions",
-        model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
+        model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
     )
     joint_table.insert(
         1,
-        "Number of Parameters",
+        "Number of Parameters (B)",
         model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
     )
     joint_table.insert(
         1,
         "Memory Usage (MB)",
         model_metas.map(
-            lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
+            lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
         ),
     )
@@ -323,18 +314,18 @@ def _create_summary_table_mean_public_private(
     joint_table.insert(
         1,
         "Embedding Dimensions",
-        model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
+        model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
     )
     joint_table.insert(
         1,
-        "Number of Parameters",
+        "Number of Parameters (B)",
         model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
     )
     joint_table.insert(
         1,
         "Memory Usage (MB)",
         model_metas.map(
-            lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
+            lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
         ),
     )
@@ -445,18 +436,18 @@ def _create_summary_table_mean_subset(
     joint_table.insert(
         1,
         "Embedding Dimensions",
-        model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
+        model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
     )
     joint_table.insert(
         1,
-        "Number of Parameters",
+        "Number of Parameters (B)",
         model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
     )
     joint_table.insert(
         1,
         "Memory Usage (MB)",
         model_metas.map(
-            lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
+            lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
         ),
     )
@@ -558,25 +549,23 @@ def _create_summary_table_mean_task_type(
     # Insert model metadata columns
     joint_table.insert(
-        1,
-        "Max Tokens",
-        model_metas.map(lambda m: _format_max_tokens(m.max_tokens)),
+        1, "Max Tokens", model_metas.map(lambda m: _format_max_tokens(m.max_tokens))
     )
     joint_table.insert(
         1,
         "Embedding Dimensions",
-        model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
+        model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
     )
     joint_table.insert(
         1,
-        "Number of Parameters",
+        "Number of Parameters (B)",
         model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
     )
     joint_table.insert(
         1,
         "Memory Usage (MB)",
         model_metas.map(
-            lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
+            lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
         ),
     )

mteb/benchmarks/benchmarks/__init__.py CHANGED Viewed

@@ -43,6 +43,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
     VN_MTEB,
     CoIR,
     MTEB_code,
+    MTEB_MAIN_RU_v1_1,
     MTEB_multilingual_v1,
     MTEB_multilingual_v2,
     RAR_b,
@@ -113,6 +114,7 @@ __all__ = [
     "VISUAL_DOCUMENT_RETRIEVAL",
     "VN_MTEB",
     "CoIR",
+    "MTEB_MAIN_RU_v1_1",
     "MTEB_code",
     "MTEB_multilingual_v1",
     "MTEB_multilingual_v2",

mteb/benchmarks/benchmarks/benchmarks.py CHANGED Viewed

@@ -185,7 +185,7 @@ We recommend that you use [MTEB(eng, v2)](http://mteb-leaderboard.hf.space/?benc
 MTEB_MAIN_RU = Benchmark(
     name="MTEB(rus, v1)",
-    display_name="Russian",
+    display_name="Russian legacy",
     icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
     tasks=MTEBTasks(
         get_tasks(
@@ -240,6 +240,67 @@ MTEB_MAIN_RU = Benchmark(
   year = {2024},
 }
 """,
+    contacts=["Samoed", "artemsnegirev", "Drozhzhinastya"],
+)
+MTEB_MAIN_RU_v1_1 = Benchmark(
+    name="MTEB(rus, v1.1)",
+    display_name="Russian",
+    icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
+    tasks=MTEBTasks(
+        get_tasks(
+            languages=["rus"],
+            tasks=[
+                # Classification
+                "GeoreviewClassification",
+                "HeadlineClassification",
+                "InappropriatenessClassification",
+                "KinopoiskClassification",
+                "MassiveIntentClassification",
+                "MassiveScenarioClassification",
+                "RuReviewsClassification",
+                "RuSciBenchGRNTIClassification",
+                "RuSciBenchOECDClassification",
+                # Clustering
+                "GeoreviewClusteringP2P",
+                "RuSciBenchGRNTIClusteringP2P",
+                "RuSciBenchOECDClusteringP2P",
+                # MultiLabelClassification
+                "CEDRClassification",
+                "SensitiveTopicsClassification",
+                # PairClassification
+                "TERRa",
+                # Reranking
+                "MIRACLReranking",
+                "RuBQReranking",
+                # Retrieval
+                "MIRACLRetrievalHardNegatives.v2",
+                "RiaNewsRetrievalHardNegatives.v2",
+                "RuBQRetrieval",
+                # STS
+                "RUParaPhraserSTS",
+                "STS22",
+            ],
+        )
+        + get_tasks(
+            tasks=["RuSTSBenchmarkSTS"],
+            eval_splits=["test"],
+        )
+    ),
+    description="A Russian version of the Massive Text Embedding Benchmark covering the task categories of classification, clustering, reranking, pair classification, retrieval, and semantic similarity. In v1.1, MIRACLRetrieval and RiaNewsRetrieval were replaced with their HardNegatives variants for improved time-optimization measurement. MIRACLRetrievalHardNegatives and RiaNewsRetrievalHardNegatives are used in their updated versions (v2), both of which include improved default prompts.",
+    reference="https://aclanthology.org/2023.eacl-main.148/",
+    citation=r"""
+@misc{snegirev2024russianfocusedembeddersexplorationrumteb,
+  archiveprefix = {arXiv},
+  author = {Artem Snegirev and Maria Tikhonova and Anna Maksimova and Alena Fenogenova and Alexander Abramov},
+  eprint = {2408.12503},
+  primaryclass = {cs.CL},
+  title = {The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design},
+  url = {https://arxiv.org/abs/2408.12503},
+  year = {2024},
+}
+""",
+    contacts=["Samoed", "artemsnegirev", "Drozhzhinastya"],
 )

mteb/leaderboard/app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import tempfile
 import time
 import warnings
 from pathlib import Path
-from typing import Literal, get_args
+from typing import Literal
 from urllib.parse import urlencode
 import cachetools
@@ -14,7 +14,6 @@ import pandas as pd
 import mteb
 from mteb import BenchmarkResults
-from mteb.abstasks.task_metadata import TaskDomain, TaskType
 from mteb.benchmarks.benchmark import RtebBenchmark
 from mteb.cache import ResultCache
 from mteb.leaderboard.benchmark_selector import (
@@ -29,7 +28,6 @@ from mteb.leaderboard.table import (
     apply_summary_styling_from_benchmark,
 )
 from mteb.leaderboard.text_segments import ACKNOWLEDGEMENT, FAQ
-from mteb.types import Modalities
 logger = logging.getLogger(__name__)
@@ -139,7 +137,10 @@ def _update_task_info(task_names: str) -> gr.DataFrame:
     df["languages"] = df["languages"].map(_format_list)
     df = df.sort_values("name")
     df["domains"] = df["domains"].map(_format_list)
-    df["name"] = f'<a href="{df["reference"]}" target="_blank">{df["name"]}</a>'
+    df["name"] = df.apply(
+        lambda row: f'<a href="{row["reference"]}" target="_blank">{row["name"]}</a>',
+        axis=1,
+    )
     df["modalities"] = df["modalities"].map(_format_list)
     df = df.rename(
         columns={
@@ -155,9 +156,8 @@ def _update_task_info(task_names: str) -> gr.DataFrame:
     df = df.drop(columns="reference")
     return gr.DataFrame(
         df,
-        datatype=["markdown"] + ["str"] * (len(df.columns) - 1),
-        show_copy_button=True,
-        show_fullscreen_button=True,
+        datatype=["markdown"] + ["str"] * (len(df.columns) - 1),  # type: ignore
+        buttons=["copy", "fullscreen"],
         show_search="filter",
     )
@@ -215,6 +215,110 @@ def _should_show_zero_shot_filter(benchmark_name: str) -> bool:
     return True
+@cachetools.cached(
+    cache={},
+    key=lambda benchmark_name, all_benchmark_results: hash(benchmark_name),
+)
+def _cache_on_benchmark_select(benchmark_name, all_benchmark_results):
+    start_time = time.time()
+    benchmark = mteb.get_benchmark(benchmark_name)
+    languages = [task.languages for task in benchmark.tasks if task.languages]
+    languages = set(itertools.chain.from_iterable(languages))
+    languages = sorted(languages)
+    domains = [
+        task.metadata.domains for task in benchmark.tasks if task.metadata.domains
+    ]
+    domains = set(itertools.chain.from_iterable(domains))
+    types = {task.metadata.type for task in benchmark.tasks if task.metadata.type}
+    modalities = set()
+    for task in benchmark.tasks:
+        modalities.update(task.metadata.modalities)
+    languages, domains, types, modalities = (
+        sorted(languages),
+        sorted(domains),
+        sorted(types),
+        sorted(modalities),
+    )
+    elapsed = time.time() - start_time
+    benchmark_results = all_benchmark_results[benchmark_name]
+    scores = benchmark_results._get_scores(format="long")
+    logger.debug(f"on_benchmark_select callback: {elapsed}s")
+    show_zero_shot = _should_show_zero_shot_filter(benchmark_name)
+    # Calculate initial models for this benchmark to avoid race conditions
+    benchmark_tasks = sorted([task.metadata.name for task in benchmark.tasks])
+    all_models_in_scores = list({entry["model_name"] for entry in scores})
+    initial_models = _filter_models(
+        all_models_in_scores,
+        benchmark_tasks,
+        availability=None,
+        compatibility=[],
+        instructions=None,
+        max_model_size=MAX_MODEL_SIZE,
+        zero_shot_setting="allow_all",
+    )
+    # Sort to ensure consistency with update_models
+    initial_models = sorted(initial_models)
+    return (
+        languages,
+        domains,
+        types,
+        modalities,
+        benchmark_tasks,
+        scores,
+        show_zero_shot,
+        initial_models,
+    )
+@cachetools.cached(
+    cache={},
+    key=lambda benchmark_name,
+    type_select,
+    domain_select,
+    lang_select,
+    modality_select: hash(
+        (
+            hash(benchmark_name),
+            hash(tuple(type_select)),
+            hash(tuple(domain_select)),
+            hash(tuple(lang_select)),
+            hash(tuple(modality_select)),
+        )
+    ),
+)
+def _cache_update_task_list(
+    benchmark_name, type_select, domain_select, lang_select, modality_select
+):
+    if not len(lang_select):
+        return []
+    start_time = time.time()
+    benchmark_tasks = []
+    tasks_to_keep = []
+    for task in mteb.get_benchmark(benchmark_name).tasks:
+        benchmark_tasks.append(task.metadata.name)
+        if task.metadata.type not in type_select:
+            continue
+        if task.metadata.domains and not (
+            set(task.metadata.domains) & set(domain_select)
+        ):
+            continue
+        if task.languages and not (set(task.languages) & set(lang_select)):
+            continue
+        if task.metadata.modalities and not (
+            set(task.metadata.modalities) & set(modality_select)
+        ):
+            continue
+        tasks_to_keep.append(task.metadata.name)
+    benchmark_tasks.sort()
+    tasks_to_keep.sort()
+    elapsed = time.time() - start_time
+    logger.debug(f"update_task_list callback: {elapsed}s")
+    return benchmark_tasks, tasks_to_keep
 def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
     """Returns a Gradio Blocks app for the MTEB leaderboard."""
     logger.info("Loading all benchmark results")
@@ -227,6 +331,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
         benchmark.name: all_results.select_tasks(benchmark.tasks).join_revisions()
         for benchmark in benchmarks
     }
     default_benchmark = mteb.get_benchmark(DEFAULT_BENCHMARK_NAME)
     default_results = all_benchmark_results[default_benchmark.name]
     logger.info("Benchmark results loaded")
@@ -257,55 +362,48 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
         default_benchmark, filtered_benchmark_results
     )
-    lang_select = gr.Dropdown(
-        LANGUAGE,
+    lang_select = gr.CheckboxGroup(
+        sorted(default_results.languages),
         value=sorted(default_results.languages),
-        allow_custom_value=True,
-        multiselect=True,
+        show_label=True,
+        show_select_all=True,
         label="Language",
         info="Select languages to include.",
     )
-    type_select = gr.Dropdown(
-        sorted(get_args(TaskType)),
+    type_select = gr.CheckboxGroup(
+        sorted(default_results.task_types),
         value=sorted(default_results.task_types),
-        multiselect=True,
+        show_label=True,
+        show_select_all=True,
         label="Task Type",
         info="Select task types to include.",
     )
-    domain_select = gr.Dropdown(
-        sorted(get_args(TaskDomain)),
+    domain_select = gr.CheckboxGroup(
+        sorted(default_results.domains),
         value=sorted(default_results.domains),
-        multiselect=True,
+        show_label=True,
+        show_select_all=True,
         label="Domain",
         info="Select domains to include.",
     )
-    task_select = gr.Dropdown(
-        sorted(all_results.task_names),
+    task_select = gr.CheckboxGroup(
+        sorted(default_results.task_names),
         value=sorted(default_results.task_names),
-        allow_custom_value=True,
-        multiselect=True,
+        show_label=True,
+        show_select_all=True,
         label="Task",
         info="Select specific tasks to include",
     )
-    modality_select = gr.Dropdown(
-        sorted(get_args(Modalities)),
+    modality_select = gr.CheckboxGroup(
+        sorted(default_results.modalities),
         value=sorted(default_results.modalities),
-        multiselect=True,
+        show_label=True,
+        show_select_all=True,
         label="Modality",
         info="Select modalities to include.",
     )
-    head = """
-      <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
-    """
-    with gr.Blocks(
-        fill_width=True,
-        theme=gr.themes.Soft(
-            font=[gr.themes.GoogleFont("Roboto Mono"), "Arial", "sans-serif"],
-        ),
-        head=head,
-    ) as demo:
+    with gr.Blocks(fill_width=True) as demo:
         with gr.Sidebar(
             position="left",
             label="Benchmark Selection and Customization",
@@ -437,9 +535,6 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
         with gr.Tab("Performance per Model Size") as plot_tab:
             plot = gr.Plot(_performance_size_plot, inputs=[summary_table])
-            gr.Markdown(
-                "*We only display TOP 5 models that have been run on all tasks in the benchmark*"
-            )
             plot_tab.select(
                 _performance_size_plot, inputs=[summary_table], outputs=[plot]
             )
@@ -465,62 +560,25 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
         # This sets the benchmark from the URL query parameters
         demo.load(_set_benchmark_on_load, inputs=[], outputs=[benchmark_select])
-        @cachetools.cached(
-            cache={},
-            key=lambda benchmark_name: hash(benchmark_name),
-        )
         def on_benchmark_select(benchmark_name):
-            start_time = time.time()
-            benchmark = mteb.get_benchmark(benchmark_name)
-            languages = [task.languages for task in benchmark.tasks if task.languages]
-            languages = set(itertools.chain.from_iterable(languages))
-            languages = sorted(languages)
-            domains = [
-                task.metadata.domains
-                for task in benchmark.tasks
-                if task.metadata.domains
-            ]
-            domains = set(itertools.chain.from_iterable(domains))
-            types = {
-                task.metadata.type for task in benchmark.tasks if task.metadata.type
-            }
-            modalities = set()
-            for task in benchmark.tasks:
-                modalities.update(task.metadata.modalities)
-            languages, domains, types, modalities = (
-                sorted(languages),
-                sorted(domains),
-                sorted(types),
-                sorted(modalities),
-            )
-            elapsed = time.time() - start_time
-            benchmark_results = all_benchmark_results[benchmark_name]
-            scores = benchmark_results._get_scores(format="long")
-            logger.debug(f"on_benchmark_select callback: {elapsed}s")
-            show_zero_shot = _should_show_zero_shot_filter(benchmark_name)
-            # Calculate initial models for this benchmark to avoid race conditions
-            benchmark_tasks = sorted([task.metadata.name for task in benchmark.tasks])
-            all_models_in_scores = list({entry["model_name"] for entry in scores})
-            initial_models = _filter_models(
-                all_models_in_scores,
-                benchmark_tasks,
-                availability=None,
-                compatibility=[],
-                instructions=None,
-                max_model_size=MAX_MODEL_SIZE,
-                zero_shot_setting="allow_all",
-            )
-            # Sort to ensure consistency with update_models
-            initial_models = sorted(initial_models)
-            return (
+            (
                 languages,
                 domains,
                 types,
                 modalities,
                 benchmark_tasks,
                 scores,
+                show_zero_shot,
+                initial_models,
+            ) = _cache_on_benchmark_select(benchmark_name, all_benchmark_results)
+            return (
+                gr.update(choices=languages, value=languages),
+                gr.update(choices=domains, value=domains),
+                gr.update(choices=types, value=types),
+                gr.update(choices=modalities, value=modalities),
+                gr.update(choices=benchmark_tasks, value=benchmark_tasks),
+                scores,
                 gr.update(visible=show_zero_shot),
                 initial_models,
             )
@@ -562,48 +620,13 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
             outputs=[scores],
         )
-        @cachetools.cached(
-            cache={},
-            key=lambda benchmark_name,
-            type_select,
-            domain_select,
-            lang_select,
-            modality_select: hash(
-                (
-                    hash(benchmark_name),
-                    hash(tuple(type_select)),
-                    hash(tuple(domain_select)),
-                    hash(tuple(lang_select)),
-                    hash(tuple(modality_select)),
-                )
-            ),
-        )
         def update_task_list(
             benchmark_name, type_select, domain_select, lang_select, modality_select
         ):
-            if not len(lang_select):
-                return []
-            start_time = time.time()
-            tasks_to_keep = []
-            for task in mteb.get_benchmark(benchmark_name).tasks:
-                if task.metadata.type not in type_select:
-                    continue
-                if task.metadata.domains is not None and not (
-                    set(task.metadata.domains) & set(domain_select)
-                ):
-                    continue
-                if task.languages is not None and not (
-                    set(task.languages) & set(lang_select)
-                ):
-                    continue
-                if task.metadata.modalities and not (
-                    set(task.metadata.modalities) & set(modality_select)
-                ):
-                    continue
-                tasks_to_keep.append(task.metadata.name)
-            elapsed = time.time() - start_time
-            logger.debug(f"update_task_list callback: {elapsed}s")
-            return sorted(tasks_to_keep)
+            benchmark_tasks, tasks_to_keep = _cache_update_task_list(
+                benchmark_name, type_select, domain_select, lang_select, modality_select
+            )
+            return gr.update(choices=benchmark_tasks, value=tasks_to_keep)
         type_select.input(
             update_task_list,
@@ -913,4 +936,15 @@ if __name__ == "__main__":
     warnings.filterwarnings("ignore", message="Couldn't get scores for .* due to .*")
     app = get_leaderboard_app()
-    app.launch(server_name="0.0.0.0", server_port=7860)
+    head = """
+    <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
+    """
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        theme=gr.themes.Soft(
+            font=[gr.themes.GoogleFont("Roboto Mono"), "Arial", "sans-serif"],
+        ),
+        head=head,
+    )

mteb/leaderboard/benchmark_selector.py CHANGED Viewed

@@ -75,14 +75,17 @@ GP_BENCHMARK_ENTRIES = [
                         "MTEB(kor, v1)",
                         "MTEB(nld, v1)",
                         "MTEB(pol, v1)",
-                        "MTEB(rus, v1)",
+                        "MTEB(rus, v1.1)",
                         "MTEB(fas, v2)",
                         "VN-MTEB (vie, v1)",
                     ]
                 )
                 + [
                     MenuEntry(
-                        "Other", mteb.get_benchmarks(["MTEB(eng, v1)", "MTEB(fas, v1)"])
+                        "Other",
+                        mteb.get_benchmarks(
+                            ["MTEB(eng, v1)", "MTEB(fas, v1)", "MTEB(rus, v1)"]
+                        ),
                     )
                 ],
             ),

mteb/leaderboard/figures.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import logging
 from typing import get_args
 import numpy as np
@@ -7,6 +8,8 @@ import plotly.graph_objects as go
 from mteb.abstasks.task_metadata import TaskType
+logger = logging.getLogger(__name__)
 def _text_plot(text: str):
     """Returns empty scatter plot with text added, this can be great for error messages."""
@@ -29,16 +32,17 @@ def _failsafe_plot(fun):
         try:
             return fun(*args, **kwargs)
         except Exception as e:
+            logger.error(f"Plot generation failed: {e}")
             return _text_plot(f"Couldn't produce plot. Reason: {e}")
     return wrapper
-def _parse_n_params(text: str) -> int:
-    if text.endswith("M"):
-        return float(text[:-1]) * 1e6
-    if text.endswith("B"):
-        return float(text[:-1]) * 1e9
+def _parse_n_params(params: float | None) -> int | float:
+    """Specified in billions."""
+    if params is None or np.isnan(params):
+        return None
+    return int(params * 1e9)
 def _parse_model_name(name: str) -> str:
@@ -51,20 +55,14 @@ def _parse_model_name(name: str) -> str:
 def _parse_float(value) -> float:
-    try:
-        if value == "Infinite":
-            return np.inf
-        else:
-            return float(value)
-    except ValueError:
+    if value is None or np.isnan(value):
         return np.nan
+    return float(value)
 def _process_max_tokens(x):
-    if pd.isna(x):
+    if pd.isna(x) or x is None or np.isinf(x):
         return "Unknown"
-    if np.isinf(x):
-        return "Infinite"
     return str(int(x))
@@ -112,7 +110,7 @@ def _add_size_guide(fig: go.Figure):
 @_failsafe_plot
 def _performance_size_plot(df: pd.DataFrame) -> go.Figure:
     df = df.copy()
-    df["Number of Parameters"] = df["Number of Parameters"].map(_parse_n_params)
+    df["Number of Parameters"] = df["Number of Parameters (B)"].map(_parse_n_params)
     df["Model"] = df["Model"].map(_parse_model_name)
     df["model_text"] = df["Model"].where(df["Model"].isin(models_to_annotate), "")
     df["Embedding Dimensions"] = df["Embedding Dimensions"].map(_parse_float)

mteb/leaderboard/table.py CHANGED Viewed

@@ -120,6 +120,14 @@ def apply_per_task_styling_from_benchmark(
     return _apply_per_task_table_styling(per_task_df)
+def _style_number_of_parameters(num_params: float) -> str:
+    """Anything bigger than 1B is shown in billions with 1 decimal (e.g. 1.712 > 1.7) while anything smaller as 0.xxx B (e.g. 0.345 remains 0.345)"""
+    if num_params >= 1:
+        return f"{num_params:.1f}"
+    else:
+        return f"{num_params:.3f}"
 def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
     """Apply styling to a raw summary DataFrame
@@ -130,7 +138,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
         "Rank (Borda)",
         "Rank",
         "Model",
-        "Number of Parameters",
+        "Number of Parameters (B)",
         "Embedding Dimensions",
         "Max Tokens",
         "Memory Usage (MB)",
@@ -156,7 +164,14 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
     joint_table[score_columns] = joint_table[score_columns].map(_format_scores)
     joint_table_style = joint_table.style.format(
-        {**dict.fromkeys(score_columns, "{:.2f}"), "Rank (Borda)": "{:.0f}"},
+        {
+            **dict.fromkeys(score_columns, "{:.2f}"),
+            "Rank (Borda)": "{:.0f}",
+            "Memory Usage (MB)": "{:.0f}",
+            "Embedding Dimensions": "{:.0f}",
+            "Max Tokens": "{:.0f}",
+            "Number of Parameters (B)": lambda x: _style_number_of_parameters(x),
+        },
         na_rep="",
     )
     joint_table_style = joint_table_style.highlight_min(
@@ -204,8 +219,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
         pinned_columns=2,
         column_widths=column_widths,
         wrap=True,
-        show_fullscreen_button=True,
-        show_copy_button=True,
+        buttons=["copy", "fullscreen"],
         show_search="filter",
     )
@@ -227,7 +241,6 @@ def _apply_per_task_table_styling(per_task: pd.DataFrame) -> gr.DataFrame:
         per_task_style,
         interactive=False,
         pinned_columns=1,
-        show_fullscreen_button=True,
-        show_copy_button=True,
+        buttons=["copy", "fullscreen"],
         show_search="filter",
     )

mteb/models/model_implementations/clips_models.py ADDED Viewed

@@ -0,0 +1,97 @@
+from mteb.models.model_meta import (
+    ModelMeta,
+    ScoringFunction,
+)
+from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
+from .e5_models import ME5_TRAINING_DATA, model_prompts
+E5_NL_CITATION = """
+@misc{banar2025mtebnle5nlembeddingbenchmark,
+  archiveprefix = {arXiv},
+  author = {Nikolay Banar and Ehsan Lotfi and Jens Van Nooten and Cristina Arhiliuc and Marija Kliocaite and Walter Daelemans},
+  eprint = {2509.12340},
+  primaryclass = {cs.CL},
+  title = {MTEB-NL and E5-NL: Embedding Benchmark and Models for Dutch},
+  url = {https://arxiv.org/abs/2509.12340},
+  year = {2025},
+}
+"""
+e5_nl_small = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=model_prompts,
+    ),
+    name="clips/e5-small-trm-nl",
+    languages=["nld-Latn"],
+    open_weights=True,
+    revision="0243664a6c5e12eef854b091eb283e51833c3e9f",
+    release_date="2025-09-23",
+    n_parameters=40_800_000,
+    memory_usage_mb=78,
+    embed_dim=384,
+    license="mit",
+    max_tokens=512,
+    reference="https://huggingface.co/clips/e5-small-trm-nl",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=True,
+    public_training_code="https://github.com/ELotfi/e5-nl",
+    public_training_data="https://huggingface.co/collections/clips/beir-nl",
+    training_datasets=ME5_TRAINING_DATA,  # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
+    adapted_from="intfloat/multilingual-e5-small",
+    citation=E5_NL_CITATION,
+)
+e5_nl_base = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=model_prompts,
+    ),
+    name="clips/e5-base-trm-nl",
+    languages=["nld-Latn"],
+    open_weights=True,
+    revision="6bd5722f236da48b4b8bcb28cc1fc478f7089956",
+    release_date="2025-09-23",
+    n_parameters=124_400_000,
+    memory_usage_mb=237,
+    embed_dim=768,
+    license="mit",
+    max_tokens=514,
+    reference="https://huggingface.co/clips/e5-base-trm-nl",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=True,
+    public_training_code="https://github.com/ELotfi/e5-nl",
+    public_training_data="https://huggingface.co/collections/clips/beir-nl",
+    adapted_from="intfloat/multilingual-e5-base",
+    training_datasets=ME5_TRAINING_DATA,  # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
+    citation=E5_NL_CITATION,
+)
+e5_nl_large = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=model_prompts,
+    ),
+    name="clips/e5-large-trm-nl",
+    languages=["nld-Latn"],
+    open_weights=True,
+    revision="683333f86ed9eb3699b5567f0fdabeb958d412b0",
+    release_date="2025-09-23",
+    n_parameters=355_000_000,
+    memory_usage_mb=1355,
+    embed_dim=1024,
+    license="mit",
+    max_tokens=514,
+    reference="https://huggingface.co/clips/e5-large-trm-nl",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=True,
+    public_training_code="https://github.com/ELotfi/e5-nl",
+    public_training_data="https://huggingface.co/collections/clips/beir-nl",
+    training_datasets=ME5_TRAINING_DATA,  # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
+    adapted_from="intfloat/multilingual-e5-large",
+    citation=E5_NL_CITATION,
+)

mteb/models/model_implementations/cohere_models.py CHANGED Viewed

@@ -8,6 +8,7 @@ import torch
 from torch.utils.data import DataLoader
 from tqdm.auto import tqdm
+from mteb._requires_package import requires_package
 from mteb.abstasks.task_metadata import TaskMetadata
 from mteb.models.abs_encoder import AbsEncoder
 from mteb.models.model_meta import ModelMeta, ScoringFunction
@@ -219,6 +220,8 @@ class CohereTextEmbeddingModel(AbsEncoder):
         output_dimension: int | None = None,
         **kwargs,
     ) -> None:
+        requires_package(self, "cohere", model_name, "pip install 'mteb[cohere]'")
         import cohere  # type: ignore
         self.model_name = model_name.removeprefix("Cohere/Cohere-")

mteb/models/model_implementations/google_models.py CHANGED Viewed

@@ -147,7 +147,6 @@ class GoogleTextEmbeddingModel(AbsEncoder):
 google_text_emb_004 = ModelMeta(
     loader=GoogleTextEmbeddingModel,  # type: ignore[call-arg]
     loader_kwargs=dict(
-        model_name="text-embedding-004",
         model_prompts=MODEL_PROMPTS,
     ),
     name="google/text-embedding-004",
@@ -172,7 +171,6 @@ google_text_emb_004 = ModelMeta(
 google_text_emb_005 = ModelMeta(
     loader=GoogleTextEmbeddingModel,  # type: ignore[call-arg]
     loader_kwargs=dict(
-        model_name="text-embedding-005",
         model_prompts=MODEL_PROMPTS,
     ),
     name="google/text-embedding-005",
@@ -197,7 +195,6 @@ google_text_emb_005 = ModelMeta(
 google_text_multilingual_emb_002 = ModelMeta(
     loader=GoogleTextEmbeddingModel,  # type: ignore[call-arg]
     loader_kwargs=dict(
-        model_name="text-embedding-002",
         model_prompts=MODEL_PROMPTS,
     ),
     name="google/text-multilingual-embedding-002",
@@ -222,7 +219,6 @@ google_text_multilingual_emb_002 = ModelMeta(
 google_gemini_embedding_001 = ModelMeta(
     loader=GoogleTextEmbeddingModel,  # type: ignore[call-arg]
     loader_kwargs=dict(
-        model_name="gemini-embedding-001",
         model_prompts=MODEL_PROMPTS,
     ),
     name="google/gemini-embedding-001",

mteb/models/model_implementations/kennethenevoldsen_models.py ADDED Viewed

@@ -0,0 +1,72 @@
+from mteb.models.model_meta import ModelMeta, ScoringFunction
+from mteb.models.sentence_transformer_wrapper import (
+    sentence_transformers_loader,
+)
+dfm_enc_large = ModelMeta(
+    loader=sentence_transformers_loader,  # type: ignore
+    name="KennethEnevoldsen/dfm-sentence-encoder-large",
+    languages=["dan-Latn"],
+    open_weights=True,
+    revision="132c53391e7a780dc6a2f9a03724d0158fe7122c",
+    release_date="2023-07-12",
+    n_parameters=355087360,
+    memory_usage_mb=1554,
+    embed_dim=1024,
+    license="mit",
+    max_tokens=512,
+    reference="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-large",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=False,
+    superseded_by=None,
+    adapted_from="chcaa/dfm-encoder-large-v1",
+    training_datasets=set(),  # just contrastive pre-training
+    public_training_code="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-large#hyperparameters",
+    citation="""@article{enevoldsenScandinavianEmbeddingBenchmarks2024,
+    title = {The {Scandinavian} {Embedding} {Benchmarks}: {Comprehensive} {Assessment} of {Multilingual} and {Monolingual} {Text} {Embedding}},
+    shorttitle = {The {Scandinavian} {Embedding} {Benchmarks}},
+    url = {https://openreview.net/forum?id=pJl_i7HIA72},
+    language = {en},
+    urldate = {2024-04-12},
+    author = {Enevoldsen, Kenneth and Kardos, Márton and Muennighoff, Niklas and Nielbo, Kristoffer},
+    month = feb,
+    year = {2024},
+}
+""",
+    public_training_data="https://huggingface.co/datasets/danish-foundation-models/danish-gigaword",  # paragraphs extracted from Danish Gigaword
+)
+dfm_enc_med = ModelMeta(
+    loader=sentence_transformers_loader,  # type: ignore
+    name="KennethEnevoldsen/dfm-sentence-encoder-medium",
+    languages=["dan-Latn"],
+    open_weights=True,
+    revision="701bce95d499fa97610d57e8823c54fd1fb79930",
+    release_date="2023-07-12",
+    n_parameters=124445952,
+    memory_usage_mb=475,
+    embed_dim=768,
+    license="mit",
+    max_tokens=512,
+    reference="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-medium",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=False,
+    superseded_by=None,
+    adapted_from=None,
+    public_training_code=None,
+    training_datasets=set(),  # just contrastive pre-training
+    citation="""@article{enevoldsenScandinavianEmbeddingBenchmarks2024,
+    title = {The {Scandinavian} {Embedding} {Benchmarks}: {Comprehensive} {Assessment} of {Multilingual} and {Monolingual} {Text} {Embedding}},
+    shorttitle = {The {Scandinavian} {Embedding} {Benchmarks}},
+    url = {https://openreview.net/forum?id=pJl_i7HIA72},
+    language = {en},
+    urldate = {2024-04-12},
+    author = {Enevoldsen, Kenneth and Kardos, Márton and Muennighoff, Niklas and Nielbo, Kristoffer},
+    month = feb,
+    year = {2024},
+}
+""",
+    public_training_data=None,
+)

mteb/models/model_implementations/linq_models.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import torch
+from mteb.models.instruct_wrapper import instruct_wrapper
 from mteb.models.model_meta import ModelMeta, ScoringFunction
-from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
 from mteb.types import PromptType
 from .e5_instruct import E5_MISTRAL_TRAINING_DATA
@@ -22,7 +22,7 @@ def instruction_template(
 Linq_Embed_Mistral = ModelMeta(
-    loader=SentenceTransformerEncoderWrapper,
+    loader=instruct_wrapper,
     loader_kwargs=dict(
         instruction_template=instruction_template,
         attn="cccc",

mteb/models/model_implementations/ru_sentence_models.py CHANGED Viewed

@@ -43,6 +43,10 @@ GIGA_task_prompts = {
         "query": "Given a news title, retrieve relevant news article",
         "document": "",
     },
+    "RiaNewsRetrievalHardNegatives.v2": {
+        "query": "Given a news title, retrieve relevant news article",
+        "document": "",
+    },
     "MIRACLReranking": {
         "query": "Given a question, retrieve Wikipedia passages that answer the question",
         "document": "",
@@ -51,6 +55,10 @@ GIGA_task_prompts = {
         "query": "Given a question, retrieve Wikipedia passages that answer the question",
         "document": "",
     },
+    "MIRACLRetrievalHardNegatives.v2": {
+        "query": "Given a question, retrieve Wikipedia passages that answer the question",
+        "document": "",
+    },
     "ArguAna": {
         "query": "Given a search query, retrieve passages that answer the question",
         "document": "Given a search query, retrieve passages that answer the question",
@@ -755,6 +763,7 @@ frida_prompts = {
     "SensitiveTopicsClassification": "categorize_topic: ",
     "TERRa": "categorize_entailment: ",
     "RiaNewsRetrieval": "categorize: ",
+    "RiaNewsRetrievalHardNegatives.v2": "",
 }
 frida_training_datasets = {

{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mteb
-Version: 2.3.1
+Version: 2.3.3
 Summary: Massive Text Embedding Benchmark
 Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
 Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -37,7 +37,7 @@ Requires-Dist: torchvision>0.2.1; extra == "image"
 Provides-Extra: codecarbon
 Requires-Dist: codecarbon<3.0.0,>=2.0.0; extra == "codecarbon"
 Provides-Extra: leaderboard
-Requires-Dist: gradio==5.49.1; extra == "leaderboard"
+Requires-Dist: gradio==6.0.1; extra == "leaderboard"
 Requires-Dist: plotly<6.0.0,>=5.24.0; extra == "leaderboard"
 Requires-Dist: cachetools>=5.2.0; extra == "leaderboard"
 Requires-Dist: matplotlib>=3.9.4; extra == "leaderboard"
@@ -108,7 +108,7 @@ Requires-Dist: qwen_vl_utils>=0.0.14; extra == "eager-embed"
 Dynamic: license-file
 <h1 align="center">
-  <img src="docs/images/logos/mteb_logo/dots-icon.png" alt="MTEB" width="28" style="vertical-align: middle; margin-right: 10px;"/> MTEB
+  <img src="https://github.com/embeddings-benchmark/mteb/blob/main/docs/images/logos/mteb_logo/dots-icon.png?raw=true" alt="MTEB" width="28" style="vertical-align: middle; margin-right: 10px;"/> MTEB
 </h1>
 <h3 align="center" style="border-bottom: none;">Multimodal toolbox for evaluating embeddings and retrieval systems</h3>
@@ -137,7 +137,7 @@ Dynamic: license-file
 <h3 align="center">
-    <a href="https://huggingface.co/spaces/mteb/leaderboard"><img style="float: middle; padding: 10px 10px 10px 10px;" width="60" height="55" src="./docs/images/logos/hf_logo.png" /></a>
+    <a href="https://huggingface.co/spaces/mteb/leaderboard"><img style="float: middle; padding: 10px 10px 10px 10px;" width="60" height="55" src="https://github.com/embeddings-benchmark/mteb/blob/main/docs/images/logos/hf_logo.png?raw=true" /></a>
 </h3>

{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/RECORD RENAMED Viewed

@@ -52,11 +52,11 @@ mteb/abstasks/text/bitext_mining.py,sha256=8m86XHJ3TxguC9itxZRq2Bt_p0NYojojS2Btk
 mteb/abstasks/text/reranking.py,sha256=rfRGRBeSjZLgkh8pneMgRm-vd9NHr5jSFH92YfOHfmU,7776
 mteb/abstasks/text/summarization.py,sha256=KYEb8gh4JjpSsrvGUmQ2VlrVdzzVxIWcitXOJUaHhO4,6954
 mteb/benchmarks/__init__.py,sha256=MQEVeli-zLaJ7Xg0z7RhXQwsdmm7Ht_W2Ln0rZo1Szc,225
-mteb/benchmarks/_create_table.py,sha256=z3iqa5dajLk0DYxEE9EeO1qpR3VJXokg8ZQ2rdUkvdM,20452
+mteb/benchmarks/_create_table.py,sha256=OAiR44ynJ2fMzoBmVITQtOTYQzxIu9KUdS_HzlBlAck,20195
 mteb/benchmarks/benchmark.py,sha256=70RlMyyg_wkWTlU_IbfLl-KaqRWXGCKTd8fWe9X-AQE,4173
 mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
-mteb/benchmarks/benchmarks/__init__.py,sha256=UD6YjWPDVPSQdUhmD-4rho08Gs5LU9pS_C2jX5eUns0,2102
-mteb/benchmarks/benchmarks/benchmarks.py,sha256=KDJanVYs3BkFn74VHwarZ8HJ2DX6EIgcVYBrlyjbv9I,89956
+mteb/benchmarks/benchmarks/__init__.py,sha256=0ySgD14Mu3Y1nJzazR_eUir81ia3x6E23N57SzQNkF0,2150
+mteb/benchmarks/benchmarks/benchmarks.py,sha256=Ob2cHVXwFk328xbV-2ZmUibiVAMtT2RN1ygGgiP6UNQ,92662
 mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=QnCSrTTaBfcRlAQp2Nu81tgv1idMXqiM16Fp2zKJ5Ys,10607
 mteb/cli/__init__.py,sha256=v-csUr3eUZElIvrGB6QGtaIdndDfNWEe9oZchsGsJpg,64
 mteb/cli/_display_tasks.py,sha256=7A06dT9sSoTz6shyMvskPxuc5eHY_H7PGPlROzMP0yw,2196
@@ -1424,10 +1424,10 @@ mteb/languages/language_family.json,sha256=OUGcHeOIPcZPb2FWmYLhxTS0JxjK5y3Fo6x0P
 mteb/languages/language_scripts.py,sha256=5wix9HTYolNIpTiS5oXf2pGJyL7ftdGKs_m432w81V8,3998
 mteb/languages/programming_languages.py,sha256=zxAakT3OSUnAuTnQ34VyeFIECnNXMlleZmAake6jsZE,211
 mteb/leaderboard/__init__.py,sha256=991roXmtRwEQysV-37hWEzWpkvPgMCGRqZTHR-hm2io,88
-mteb/leaderboard/app.py,sha256=EsQ_qoJ26yJbg2qExKFFAx90R8VYOO6GbLtIzFuHGpE,32642
-mteb/leaderboard/benchmark_selector.py,sha256=hnXdo_Kj4UUAruFl6nZkCxAQ88IEfbaH8EADFJMMdVo,7686
-mteb/leaderboard/figures.py,sha256=Rq20LFpaUhQD4tuKp7P7ExQtAjonMLibgO3ud0ykMag,7491
-mteb/leaderboard/table.py,sha256=qs0H_Gt9FzRvzb-AL0YlqEe0YAsdYsVX3QlncfCBEqg,7828
+mteb/leaderboard/app.py,sha256=29MxFLKEVT-roULHG5boHmsQVhld1rDGNS94r7MWlz8,33118
+mteb/leaderboard/benchmark_selector.py,sha256=uH66SI0iT1J4_fnebViWa83dQwhPi7toBv7PRL_epDw,7784
+mteb/leaderboard/figures.py,sha256=cfOK82rRf-7sCjyP7GBxh4ezhOIt0OhD0_86mKtzLrg,7530
+mteb/leaderboard/table.py,sha256=6SnrYC5GcBlvVSO6vOk6ObuqtoveBLv3JUuXqdKueG8,8333
 mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
 mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
 mteb/models/abs_encoder.py,sha256=m0JkRfRPMYadDgBR9eozRloI31ZSWkSzDFINpwbfLZk,16533
@@ -1460,9 +1460,10 @@ mteb/models/model_implementations/bmretriever_models.py,sha256=ABfrACa028Dcujan7
 mteb/models/model_implementations/cadet_models.py,sha256=bDula_VroXOWgSw-tquvNVGcGg7_Z1xHnoTDn6OGOYU,2225
 mteb/models/model_implementations/cde_models.py,sha256=3nNU3nq3VZZcImFqH1VPj57-QJNMU6Ei2C_HCaicuUs,9012
 mteb/models/model_implementations/clip_models.py,sha256=zrfgNmZszu0JMtMNdCMzEohixsrnQ7xFhCqgsiucH_Q,6107
+mteb/models/model_implementations/clips_models.py,sha256=QwwoU4Zu_zwUgUg7Hn2lzpXK-GjXIST0qF_2oRxHm2Y,3410
 mteb/models/model_implementations/codefuse_models.py,sha256=19Y-d_qetVU64quzEvuUJ_K8DHo1JEEKEGqjRR48dFg,9113
 mteb/models/model_implementations/codesage_models.py,sha256=D4CdISGyv5f2GMYq4_efgm5qNq80SWAX5R2u5mjEiXM,2998
-mteb/models/model_implementations/cohere_models.py,sha256=LiYYRT3clhFlh0RE654KyZtO66vnIO22h79HJLmXYwk,13696
+mteb/models/model_implementations/cohere_models.py,sha256=OWFClVAN4phjBoxfGGDyGDmzMu-t2VrjCGFyAIWmz4w,13832
 mteb/models/model_implementations/cohere_v.py,sha256=K6VEw1NkyM2PuMd18kHE6aqPrcByYSwEmAKjvLods_w,15760
 mteb/models/model_implementations/colpali_models.py,sha256=7PJ0SshVXasyncTfZRFIf_ZWzbqxJhhzNKAoGLhNktw,9004
 mteb/models/model_implementations/colqwen_models.py,sha256=6upaxe19V8j5Ayu03Dgj5jPtC8SJBCITK_RionJRMSE,15545
@@ -1480,7 +1481,7 @@ mteb/models/model_implementations/evaclip_models.py,sha256=cPMGYLDIq4s8zJxb4vPXq
 mteb/models/model_implementations/fa_models.py,sha256=WGal70_ezITWoNdjcMdbOCTSCtoaXzuPadYstLVXxhg,7478
 mteb/models/model_implementations/geogpt_models.py,sha256=Juv86SwhgQX80lVLjAFtim2aSiJT1AcgjniyyiKyk1Q,1923
 mteb/models/model_implementations/gme_v_models.py,sha256=NkfgR3_UdZzoBt1NnalVou6LOR-F7qXM4by9EbAVrys,13568
-mteb/models/model_implementations/google_models.py,sha256=ROo83udaUmPx0U_qfFuS55DSrCILVsRZu3oLp_P-srg,9296
+mteb/models/model_implementations/google_models.py,sha256=7QfsaJ5JNDRQxFl7Zh2AtiR2PR7PZcfeCBgviuOFBCo,9130
 mteb/models/model_implementations/granite_vision_embedding_models.py,sha256=uqQ5-e_a-ADv3gf3sR9Drk0S4x8Gy8mZkpL-E4X16TM,7241
 mteb/models/model_implementations/gritlm_models.py,sha256=aS_CuioL95JAQMYiaKlGuAWU9wZjabn268Xut3bD8-w,3005
 mteb/models/model_implementations/gte_models.py,sha256=o26Xyu_tucUlP435Q_jB4-bl0xckgj4wtbutTwhYgIo,10073
@@ -1492,9 +1493,10 @@ mteb/models/model_implementations/jasper_models.py,sha256=ZY7qRRpBpD3eVryQb4rLs5
 mteb/models/model_implementations/jina_clip.py,sha256=CfiIxbhKspjQajNtObCfGPHOWPk6uLn4cuwydQHFTMo,5118
 mteb/models/model_implementations/jina_models.py,sha256=HrHm2Io3g9gHwxU5icAaudy_E8rAVkAAIFSzVYWF-dM,34859
 mteb/models/model_implementations/kalm_models.py,sha256=FmW7Z5Qs6WYBLuKvql3u4IJW36kj4k-Ypah8qTBEBkg,59837
+mteb/models/model_implementations/kennethenevoldsen_models.py,sha256=DF-9nmsewYO9ikZ0kV81ujKGr7Ot36-9iPoxN7KX2mY,2993
 mteb/models/model_implementations/lens_models.py,sha256=fC7_NB1F8vBAlXD0p0-hALf6eZTPFJwpz57dy71OlwI,1696
 mteb/models/model_implementations/lgai_embedding_models.py,sha256=S83pbfkMH3YUNl4skusgbK-Rn-uLuScQVxgXwegR_N4,2333
-mteb/models/model_implementations/linq_models.py,sha256=rnW27MybLMQ2Y3OxDyBTMSIsx_hXC0DlMD4kFv7NJV0,1918
+mteb/models/model_implementations/linq_models.py,sha256=EtvUyiNbjU-GJd1kS0Z0gBACkP2pFOjk0KfGMZz4K9Y,1872
 mteb/models/model_implementations/listconranker.py,sha256=pFISrZ91NHsnhc5El5U_ZPsB9cSTuTY8-nDzpoNMC9s,4485
 mteb/models/model_implementations/llm2clip_models.py,sha256=_sqAOb5oSbxn1oaXjWwPXRjTvxLT48xXL_tuabt2Ks0,9265
 mteb/models/model_implementations/llm2vec_models.py,sha256=Og_EqnOXgIfaTcVTl3Lj5BicG83ycnXS_YHNtK63I-A,12638
@@ -1531,7 +1533,7 @@ mteb/models/model_implementations/repllama_models.py,sha256=89HoqEpzkNysHeuf_-Yh
 mteb/models/model_implementations/rerankers_custom.py,sha256=ro73A9-hHudy3_qIMrhP-ja-3Xqu78r_aORm856zHQc,10651
 mteb/models/model_implementations/rerankers_monot5_based.py,sha256=rxVwzapNnHl4gCw79XVCaTXj3-wbToyj7XVL97tpAF4,34302
 mteb/models/model_implementations/richinfoai_models.py,sha256=llvYa0JUjyOOMbuTgOYoJ2qeqZ5rLHX1ZjZIYlYbdvA,989
-mteb/models/model_implementations/ru_sentence_models.py,sha256=Dstx46xFcAOC7giKPclC41GJTtFfmg4t6gLTdAnrxDk,40129
+mteb/models/model_implementations/ru_sentence_models.py,sha256=GuZFwbzaooufvSMGNjIsL0DDLrqHjhdSsAQHHZo5H08,40480
 mteb/models/model_implementations/salesforce_models.py,sha256=KslTK-IKeLvNG-vQir9k6swkaOgjk6eyozm_BOVgTpY,5160
 mteb/models/model_implementations/samilpwc_models.py,sha256=oMwKNwCxoH1jZgCy04oo2oVlBZWu253QMpnEEC6emz8,2021
 mteb/models/model_implementations/searchmap_models.py,sha256=XvVl99emIgnNUCxkTuFQXW6py2R8vgsArfpyHveCugw,1904
@@ -2567,9 +2569,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
 mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
 mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
 mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
-mteb-2.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-mteb-2.3.1.dist-info/METADATA,sha256=AeDGGuksA6YmVR7zGXWB1jbk2mUD3w5tRCgTZjTnZ4U,13798
-mteb-2.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-mteb-2.3.1.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
-mteb-2.3.1.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
-mteb-2.3.1.dist-info/RECORD,,
+mteb-2.3.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+mteb-2.3.3.dist-info/METADATA,sha256=LbvRqywjhaqAK4910G8ueME52YrrqFzvm4NXl2M3MBA,13923
+mteb-2.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+mteb-2.3.3.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
+mteb-2.3.3.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
+mteb-2.3.3.dist-info/RECORD,,

{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{mteb-2.3.1.dist-info → mteb-2.3.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

mteb 2.3.1__py3-none-any.whl → 2.3.3__py3-none-any.whl

mteb 2.3.1py3-none-any.whl → 2.3.3py3-none-any.whl