PyPI - mteb - Versions diffs - 2.3.8__py3-none-any.whl → 2.3.10__py3-none-any.whl - Mend

mteb 2.3.8py3-none-any.whl → 2.3.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

mteb/benchmarks/_create_table.py +60 -0
mteb/benchmarks/benchmark.py +26 -2
mteb/benchmarks/benchmarks/benchmarks.py +31 -0
mteb/leaderboard/app.py +34 -2
mteb/leaderboard/table.py +62 -0
mteb/models/model_implementations/e5_models.py +3 -101
mteb/models/model_implementations/facebookai.py +147 -0
mteb/models/model_implementations/kblab.py +24 -0
mteb/models/model_implementations/kfst.py +24 -0
mteb/models/model_implementations/pawan_models.py +38 -0
mteb/results/benchmark_results.py +2 -1
mteb/results/model_result.py +9 -3
{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/METADATA +1 -1
{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/RECORD +18 -14
{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/WHEEL +0 -0
{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/entry_points.txt +0 -0
{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/licenses/LICENSE +0 -0
{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/top_level.txt +0 -0

mteb/benchmarks/_create_table.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import re
 from collections import defaultdict
+from typing import Literal
 import numpy as np
 import pandas as pd
@@ -241,6 +242,65 @@ def _create_per_task_table_from_benchmark_results(
     return per_task
+def _create_per_language_table_from_benchmark_results(
+    benchmark_results: BenchmarkResults,
+    language_view: list[str] | Literal["all"],
+) -> pd.DataFrame:
+    """Create per-language table from BenchmarkResults.
+    Returns a DataFrame with one row per model and one column per language.
+    Args:
+        benchmark_results: BenchmarkResults object containing model results
+        language_view: List of languages to include in the per-language table, or "all" for all languages present in the results
+    Returns:
+        DataFrame with per-language scores, ready for styling in the leaderboard
+    """
+    if language_view != "all" and not isinstance(language_view, list):
+        raise ValueError("language_view must be a list of languages or 'all'")
+    data = benchmark_results.to_dataframe(aggregation_level="language", format="long")
+    if data.empty:
+        no_results_frame = pd.DataFrame(
+            {"No results": ["You can try relaxing your criteria"]}
+        )
+        return no_results_frame
+    if language_view != "all":
+        data = data[data["language"].isin(language_view)]
+    per_language = data.pivot_table(
+        index="model_name", columns="language", values="score", aggfunc="mean"
+    )
+    to_remove = per_language.isna().all(axis="columns")
+    if to_remove.all():
+        no_results_frame = pd.DataFrame(
+            {"No results": ["You can try relaxing your criteria"]}
+        )
+        return no_results_frame
+    models_to_remove = list(per_language[to_remove].index)
+    per_language = per_language.drop(models_to_remove, axis=0)
+    per_language["borda_rank"] = _get_borda_rank(per_language)
+    per_language = per_language.sort_values("borda_rank", ascending=True)
+    per_language = per_language.drop(columns=["borda_rank"])
+    per_language = per_language.reset_index()
+    per_language["model_name"] = per_language["model_name"].map(
+        lambda name: name.split("/")[-1]
+    )
+    per_language = per_language.rename(
+        columns={
+            "model_name": "Model",
+        }
+    )
+    return per_language
 def _create_summary_table_mean_public_private(
     benchmark_results: BenchmarkResults,
 ) -> pd.DataFrame:

mteb/benchmarks/benchmark.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from collections.abc import Iterable, Sequence
-from dataclasses import dataclass
-from typing import TYPE_CHECKING
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Literal
 import pandas as pd
 from mteb.benchmarks._create_table import (
+    _create_per_language_table_from_benchmark_results,
     _create_per_task_table_from_benchmark_results,
     _create_summary_table_from_benchmark_results,
     _create_summary_table_mean_public_private,
@@ -50,6 +51,7 @@ class Benchmark:
     display_on_leaderboard: bool = True
     icon: str | None = None
     display_name: str | None = None
+    language_view: list[str] | Literal["all"] = field(default_factory=list)
     def __iter__(self) -> Iterable["AbsTask"]:
         return iter(self.tasks)
@@ -80,6 +82,28 @@ class Benchmark:
         """
         return _create_per_task_table_from_benchmark_results(benchmark_results)
+    def _create_per_language_table(
+        self, benchmark_results: BenchmarkResults
+    ) -> pd.DataFrame:
+        """Create per-language table. Called by the leaderboard app.
+        Returns:
+            A pandas DataFrame representing the per-language results.
+        """
+        if self.language_view == "all" or len(self.language_view) > 0:
+            return _create_per_language_table_from_benchmark_results(
+                benchmark_results, self.language_view
+            )
+        else:
+            no_results_frame = pd.DataFrame(
+                {
+                    "No results": [
+                        "The per-language table is not available for this benchmark."
+                    ]
+                }
+            )
+            return no_results_frame
 class RtebBenchmark(Benchmark):
     """Wrapper for RTEB benchmark."""

mteb/benchmarks/benchmarks/benchmarks.py CHANGED Viewed

@@ -471,6 +471,7 @@ SEB = Benchmark(
     name="MTEB(Scandinavian, v1)",
     display_name="Scandinavian",
     icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/dk.svg",
+    language_view=["dan-Latn", "swe-Latn", "nno-Latn", "nob-Latn"],
     tasks=get_tasks(
         tasks=[
             # Bitext
@@ -953,6 +954,28 @@ MTEB_multilingual_v1 = Benchmark(
 MTEB_multilingual_v2 = Benchmark(
     name="MTEB(Multilingual, v2)",
     display_name="Multilingual",
+    language_view=[
+        "eng-Latn",  # English
+        "zho-Hans",  # Chinese (Simplified)
+        "hin-Deva",  # Hindi
+        "spa-Latn",  # Spanish
+        "fra-Latn",  # French
+        "ara-Arab",  # Arabic
+        "ben-Beng",  # Bengali
+        "rus-Cyrl",  # Russian
+        "por-Latn",  # Portuguese
+        "urd-Arab",  # Urdu
+        "ind-Latn",  # Indonesian
+        "deu-Latn",  # German
+        "jpn-Jpan",  # Japanese
+        "swa-Latn",  # Swahili
+        "mar-Deva",  # Marathi
+        "tel-Telu",  # Telugu
+        "tur-Latn",  # Turkish
+        "tam-Taml",  # Tamil
+        "vie-Latn",  # Vietnamese
+        "kor-Hang",  # Korean
+    ],
     icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-globe.svg",
     tasks=mteb_multilingual_tasks,
     description="A large-scale multilingual expansion of MTEB, driven mainly by highly-curated community contributions covering 250+ languages. ",
@@ -2283,6 +2306,14 @@ VIDORE_V2 = Benchmark(
 VIDORE_V3 = VidoreBenchmark(
     name="ViDoRe(v3)",
     display_name="ViDoRe V3",
+    language_view=[
+        "deu-Latn",
+        "eng-Latn",
+        "fra-Latn",
+        "ita-Latn",
+        "por-Latn",
+        "spa-Latn",
+    ],
     icon="https://cdn-uploads.huggingface.co/production/uploads/66e16a677c2eb2da5109fb5c/x99xqw__fl2UaPbiIdC_f.png",
     tasks=get_tasks(
         tasks=[

mteb/leaderboard/app.py CHANGED Viewed

@@ -24,6 +24,7 @@ from mteb.leaderboard.benchmark_selector import (
 )
 from mteb.leaderboard.figures import _performance_size_plot, _radar_chart
 from mteb.leaderboard.table import (
+    apply_per_language_styling_from_benchmark,
     apply_per_task_styling_from_benchmark,
     apply_summary_styling_from_benchmark,
 )
@@ -361,6 +362,13 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
     per_task_table = apply_per_task_styling_from_benchmark(
         default_benchmark, filtered_benchmark_results
     )
+    per_language_table = apply_per_language_styling_from_benchmark(
+        default_benchmark,
+        filtered_benchmark_results,
+    )
+    # Check if this benchmark displays per-language results
+    display_language_table = len(default_benchmark.language_view) > 0
     lang_select = gr.CheckboxGroup(
         sorted(default_results.languages),
@@ -554,6 +562,16 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
             download_per_task.click(
                 _download_table, inputs=[per_task_table], outputs=[download_per_task]
             )
+        with gr.Tab(
+            "Performance per language", visible=display_language_table
+        ) as language_tab:
+            per_language_table.render()
+            download_per_language = gr.DownloadButton("Download Table")
+            download_per_language.click(
+                _download_table,
+                inputs=[per_language_table],
+                outputs=[download_per_language],
+            )
         with gr.Tab("Task information"):
             task_info_table = gr.DataFrame(_update_task_info, inputs=[task_select])  # noqa: F841
@@ -879,9 +897,18 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
             per_task = apply_per_task_styling_from_benchmark(
                 benchmark, filtered_benchmark_results
             )
+            per_language = apply_per_language_styling_from_benchmark(
+                benchmark,
+                filtered_benchmark_results,
+            )
             elapsed = time.time() - start_time
             logger.debug(f"update_tables callback: {elapsed}s")
-            return summary, per_task
+            return (
+                summary,
+                per_task,
+                per_language,
+                gr.update(visible=len(benchmark.language_view) > 0),
+            )
         # Only update tables when models change, not when scores/tasks change directly
         # This avoids redundant updates since scores/tasks changes trigger update_models
@@ -890,7 +917,12 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
             item.change(
                 update_tables,
                 inputs=[scores, task_select, models, benchmark_select],
-                outputs=[summary_table, per_task_table],
+                outputs=[
+                    summary_table,
+                    per_task_table,
+                    per_language_table,
+                    language_tab,
+                ],
             )
         gr.Markdown(ACKNOWLEDGEMENT, elem_id="ack_markdown")

mteb/leaderboard/table.py CHANGED Viewed

@@ -120,6 +120,31 @@ def apply_per_task_styling_from_benchmark(
     return _apply_per_task_table_styling(per_task_df)
+def apply_per_language_styling_from_benchmark(
+    benchmark_instance: Benchmark, benchmark_results: BenchmarkResults
+) -> gr.DataFrame:
+    """Apply styling to per-language table created by the benchmark instance's _create_per_language_table method.
+    This supports polymorphism - different benchmark classes can have different table generation logic.
+    Args:
+        benchmark_instance: The benchmark instance
+        benchmark_results: BenchmarkResults object containing model results (may be pre-filtered)
+    Returns:
+        Styled gr.DataFrame ready for display in the leaderboard
+    """
+    # Use the instance method to support polymorphism
+    per_language_df = benchmark_instance._create_per_language_table(benchmark_results)
+    # If it's a no-results DataFrame, return it as-is
+    if "No results" in per_language_df.columns:
+        return gr.DataFrame(per_language_df)
+    # Apply the styling
+    return _apply_per_language_table_styling(per_language_df)
 def _style_number_of_parameters(num_params: float) -> str:
     """Anything bigger than 1B is shown in billions with 1 decimal (e.g. 1.712 > 1.7) while anything smaller as 0.xxx B (e.g. 0.345 remains 0.345)"""
     if num_params >= 1:
@@ -237,10 +262,47 @@ def _apply_per_task_table_styling(per_task: pd.DataFrame) -> gr.DataFrame:
         "{:.2f}", subset=task_score_columns, na_rep=""
     ).highlight_max(subset=task_score_columns, props="font-weight: bold")
+    # setting task name column width to 250px
+    column_widths = _get_column_widths(per_task_style.data)
+    if len(column_widths) > 0:
+        column_widths[0] = "250px"
     return gr.DataFrame(
         per_task_style,
         interactive=False,
         pinned_columns=1,
+        column_widths=column_widths,
+        buttons=["copy", "fullscreen"],
+        show_search="filter",
+    )
+def _apply_per_language_table_styling(per_language: pd.DataFrame) -> gr.DataFrame:
+    """Apply styling to a raw per-task DataFrame
+    Returns:
+        Styled gr.DataFrame ready for display in the leaderboard
+    """
+    language_score_columns = per_language.select_dtypes("number").columns
+    per_language[language_score_columns] *= 100
+    if len(per_language.columns) > 100:  # Avoid gradio error on very wide tables
+        per_language_style = per_language.round(2)
+    else:
+        per_language_style = per_language.style.format(
+            "{:.2f}", subset=language_score_columns, na_rep=""
+        ).highlight_max(subset=language_score_columns, props="font-weight: bold")
+    # setting task name column width to 250px
+    column_widths = _get_column_widths(per_language_style.data)
+    if len(column_widths) > 0:
+        column_widths[0] = "250px"
+    return gr.DataFrame(
+        per_language_style,
+        interactive=False,
+        pinned_columns=1,
+        column_widths=column_widths,
         buttons=["copy", "fullscreen"],
         show_search="filter",
     )

mteb/models/model_implementations/e5_models.py CHANGED Viewed

@@ -5,108 +5,10 @@ from mteb.models.model_meta import (
 from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
 from mteb.types import PromptType
+from .facebookai import XLMR_LANGUAGES
 E5_PAPER_RELEASE_DATE = "2024-02-08"
-XLMR_LANGUAGES = [
-    "afr-Latn",
-    "amh-Latn",
-    "ara-Latn",
-    "asm-Latn",
-    "aze-Latn",
-    "bel-Latn",
-    "bul-Latn",
-    "ben-Latn",
-    "ben-Beng",
-    "bre-Latn",
-    "bos-Latn",
-    "cat-Latn",
-    "ces-Latn",
-    "cym-Latn",
-    "dan-Latn",
-    "deu-Latn",
-    "ell-Latn",
-    "eng-Latn",
-    "epo-Latn",
-    "spa-Latn",
-    "est-Latn",
-    "eus-Latn",
-    "fas-Latn",
-    "fin-Latn",
-    "fra-Latn",
-    "fry-Latn",
-    "gle-Latn",
-    "gla-Latn",
-    "glg-Latn",
-    "guj-Latn",
-    "hau-Latn",
-    "heb-Latn",
-    "hin-Latn",
-    "hin-Deva",
-    "hrv-Latn",
-    "hun-Latn",
-    "hye-Latn",
-    "ind-Latn",
-    "isl-Latn",
-    "ita-Latn",
-    "jpn-Latn",
-    "jav-Latn",
-    "kat-Latn",
-    "kaz-Latn",
-    "khm-Latn",
-    "kan-Latn",
-    "kor-Latn",
-    "kur-Latn",
-    "kir-Latn",
-    "lat-Latn",
-    "lao-Latn",
-    "lit-Latn",
-    "lav-Latn",
-    "mlg-Latn",
-    "mkd-Latn",
-    "mal-Latn",
-    "mon-Latn",
-    "mar-Latn",
-    "msa-Latn",
-    "mya-Latn",
-    "nep-Latn",
-    "nld-Latn",
-    "nob-Latn",
-    "orm-Latn",
-    "ori-Latn",
-    "pan-Latn",
-    "pol-Latn",
-    "pus-Latn",
-    "por-Latn",
-    "ron-Latn",
-    "rus-Latn",
-    "san-Latn",
-    "snd-Latn",
-    "sin-Latn",
-    "slk-Latn",
-    "slv-Latn",
-    "som-Latn",
-    "sqi-Latn",
-    "srp-Latn",
-    "sun-Latn",
-    "swe-Latn",
-    "swa-Latn",
-    "tam-Latn",
-    "tam-Taml",
-    "tel-Latn",
-    "tel-Telu",
-    "tha-Latn",
-    "tgl-Latn",
-    "tur-Latn",
-    "uig-Latn",
-    "ukr-Latn",
-    "urd-Latn",
-    "urd-Arab",
-    "uzb-Latn",
-    "vie-Latn",
-    "xho-Latn",
-    "yid-Latn",
-    "zho-Hant",
-    "zho-Hans",
-]
 MULTILINGUAL_E5_CITATION = """
 @article{wang2024multilingual,

mteb/models/model_implementations/facebookai.py ADDED Viewed

@@ -0,0 +1,147 @@
+from mteb.models import sentence_transformers_loader
+from mteb.models.model_meta import ModelMeta, ScoringFunction
+XLMR_LANGUAGES = [
+    "afr-Latn",
+    "amh-Latn",
+    "ara-Latn",
+    "asm-Latn",
+    "aze-Latn",
+    "bel-Latn",
+    "bul-Latn",
+    "ben-Latn",
+    "ben-Beng",
+    "bre-Latn",
+    "bos-Latn",
+    "cat-Latn",
+    "ces-Latn",
+    "cym-Latn",
+    "dan-Latn",
+    "deu-Latn",
+    "ell-Latn",
+    "eng-Latn",
+    "epo-Latn",
+    "spa-Latn",
+    "est-Latn",
+    "eus-Latn",
+    "fas-Latn",
+    "fin-Latn",
+    "fra-Latn",
+    "fry-Latn",
+    "gle-Latn",
+    "gla-Latn",
+    "glg-Latn",
+    "guj-Latn",
+    "hau-Latn",
+    "heb-Latn",
+    "hin-Latn",
+    "hin-Deva",
+    "hrv-Latn",
+    "hun-Latn",
+    "hye-Latn",
+    "ind-Latn",
+    "isl-Latn",
+    "ita-Latn",
+    "jpn-Latn",
+    "jav-Latn",
+    "kat-Latn",
+    "kaz-Latn",
+    "khm-Latn",
+    "kan-Latn",
+    "kor-Latn",
+    "kur-Latn",
+    "kir-Latn",
+    "lat-Latn",
+    "lao-Latn",
+    "lit-Latn",
+    "lav-Latn",
+    "mlg-Latn",
+    "mkd-Latn",
+    "mal-Latn",
+    "mon-Latn",
+    "mar-Latn",
+    "msa-Latn",
+    "mya-Latn",
+    "nep-Latn",
+    "nld-Latn",
+    "nob-Latn",
+    "orm-Latn",
+    "ori-Latn",
+    "pan-Latn",
+    "pol-Latn",
+    "pus-Latn",
+    "por-Latn",
+    "ron-Latn",
+    "rus-Latn",
+    "san-Latn",
+    "snd-Latn",
+    "sin-Latn",
+    "slk-Latn",
+    "slv-Latn",
+    "som-Latn",
+    "sqi-Latn",
+    "srp-Latn",
+    "sun-Latn",
+    "swe-Latn",
+    "swa-Latn",
+    "tam-Latn",
+    "tam-Taml",
+    "tel-Latn",
+    "tel-Telu",
+    "tha-Latn",
+    "tgl-Latn",
+    "tur-Latn",
+    "uig-Latn",
+    "ukr-Latn",
+    "urd-Latn",
+    "urd-Arab",
+    "uzb-Latn",
+    "vie-Latn",
+    "xho-Latn",
+    "yid-Latn",
+    "zho-Hant",
+    "zho-Hans",
+]
+xlmr_base = ModelMeta(
+    loader=sentence_transformers_loader,  # type: ignore[arg-type]
+    name="FacebookAI/xlm-roberta-base",
+    languages=XLMR_LANGUAGES,
+    open_weights=True,
+    revision="e73636d4f797dec63c3081bb6ed5c7b0bb3f2089",
+    release_date="2019-11-05",  # arxiv paper release
+    n_parameters=278043648,
+    memory_usage_mb=1064,
+    embed_dim=768,
+    license="mit",
+    max_tokens=512,
+    reference="https://huggingface.co/FacebookAI/xlm-roberta-base",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=False,
+    public_training_code=None,
+    public_training_data=None,
+    training_datasets=set(),
+)
+xlmr_large = ModelMeta(
+    loader=sentence_transformers_loader,  # type: ignore[arg-type]
+    name="FacebookAI/xlm-roberta-large",
+    languages=XLMR_LANGUAGES,
+    open_weights=True,
+    revision="c23d21b0620b635a76227c604d44e43a9f0ee389",
+    release_date="2019-11-05",  # arxiv paper release
+    n_parameters=559890432,
+    memory_usage_mb=2141,
+    embed_dim=1024,
+    license="mit",
+    max_tokens=512,
+    reference="https://huggingface.co/FacebookAI/xlm-roberta-large",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=False,
+    public_training_code=None,
+    public_training_data=None,
+    training_datasets=set(),
+)

mteb/models/model_implementations/kblab.py ADDED Viewed

@@ -0,0 +1,24 @@
+from mteb.models import sentence_transformers_loader
+from mteb.models.model_meta import ModelMeta, ScoringFunction
+sbert_swedish = ModelMeta(
+    loader=sentence_transformers_loader,  # type: ignore[arg-type]
+    name="KBLab/sentence-bert-swedish-cased",
+    languages=["swe-Latn"],
+    open_weights=True,
+    revision="6b5e83cd29c03729cfdc33d13b1423399b0efb5c",
+    release_date="2023-01-11",
+    n_parameters=124690944,
+    memory_usage_mb=476,
+    embed_dim=768,
+    license="apache-2.0",
+    max_tokens=384,
+    reference="https://huggingface.co/KBLab/sentence-bert-swedish-cased",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=False,
+    public_training_code=None,
+    public_training_data=None,
+    training_datasets=None,
+    adapted_from="sentence-transformers/all-mpnet-base-v2",
+)

mteb/models/model_implementations/kfst.py ADDED Viewed

@@ -0,0 +1,24 @@
+from mteb.models import sentence_transformers_loader
+from mteb.models.model_meta import ModelMeta, ScoringFunction
+xlmr_scandi = ModelMeta(
+    loader=sentence_transformers_loader,  # type: ignore[arg-type]
+    name="KFST/XLMRoberta-en-da-sv-nb",
+    languages=["swe-Latn", "nob-Latn", "nno-Latn", "dan-Latn", "eng-Latn"],
+    open_weights=True,
+    revision="d40c10ca7b1e68b5a8372f2d112dac9eb3279df1",
+    release_date="2022-02-22",
+    n_parameters=278043648,
+    memory_usage_mb=1061,
+    embed_dim=768,
+    license="not specified",
+    max_tokens=512,
+    reference="https://huggingface.co/KFST/XLMRoberta-en-da-sv-nb",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=False,
+    public_training_code=None,
+    public_training_data=None,
+    training_datasets=None,
+    adapted_from="FacebookAI/xlm-roberta-base",
+)

mteb/models/model_implementations/pawan_models.py ADDED Viewed

@@ -0,0 +1,38 @@
+from mteb.models.model_meta import (
+    ModelMeta,
+    ScoringFunction,
+)
+from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
+PAWAN_EMBD_CITATION = """@misc{medhi2025pawanembd,
+    title={PawanEmbd-68M: Distilled Embedding Model},
+    author={Medhi, D.},
+    year={2025},
+    url={https://huggingface.co/dmedhi/PawanEmbd-68M}
+}"""
+pawan_embd_68m = ModelMeta(
+    loader=sentence_transformers_loader,
+    name="dmedhi/PawanEmbd-68M",
+    languages=["eng-Latn"],
+    open_weights=True,
+    revision="32f295145802bdbd65699ad65fd27d2a5b69a909",
+    release_date="2025-12-08",
+    n_parameters=68_000_000,
+    memory_usage_mb=260,
+    embed_dim=768,
+    license="apache-2.0",
+    max_tokens=512,
+    reference="https://huggingface.co/dmedhi/PawanEmbd-68M",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["Sentence Transformers", "PyTorch"],
+    adapted_from="ibm-granite/granite-embedding-278m-multilingual",
+    superseded_by=None,
+    public_training_code=None,
+    public_training_data=None,
+    use_instructions=False,
+    training_datasets={
+        "AllNLI",
+    },
+    citation=PAWAN_EMBD_CITATION,
+)

mteb/results/benchmark_results.py CHANGED Viewed

@@ -296,7 +296,7 @@ class BenchmarkResults(BaseModel):
     def to_dataframe(
         self,
-        aggregation_level: Literal["subset", "split", "task"] = "task",
+        aggregation_level: Literal["subset", "split", "task", "language"] = "task",
         aggregation_fn: Callable[[list[Score]], Any] | None = None,
         include_model_revision: bool = False,
         format: Literal["wide", "long"] = "wide",
@@ -321,6 +321,7 @@ class BenchmarkResults(BaseModel):
                 - "subset"/None: No aggregation will be done. The DataFrame will have one row per model, task, split and subset.
                 - "split": Aggregates the scores by split. The DataFrame will have one row per model, task and split.
                 - "task": Aggregates the scores by task. The DataFrame will have one row per model and task.
+                - "language": Aggregates the scores by language. The DataFrame will have one row per model and language.
             aggregation_fn: The function to use for aggregation. If None, the mean will be used.
             include_model_revision: If True, the model revision will be included in the DataFrame. If False, it will be excluded.
                 If there are multiple revisions for the same model, they will be joined using the `join_revisions` method.

mteb/results/model_result.py CHANGED Viewed

@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
 def _aggregate_and_pivot(
     df: pd.DataFrame,
     columns: list[str],
-    aggregation_level: Literal["subset", "split", "task"],
+    aggregation_level: Literal["subset", "split", "task", "language"],
     format: Literal["wide", "long"],
     aggregation_fn: Callable[[list[Score]], Any] | None,
 ) -> pd.DataFrame:
@@ -43,6 +43,12 @@ def _aggregate_and_pivot(
     elif aggregation_level == "task":
         index_columns = ["task_name"]
+    elif aggregation_level == "language":
+        index_columns = ["language"]
+        df = df.explode("language").reset_index(
+            drop=True
+        )  # each language in its own row before aggregation
     # perform aggregation
     if aggregation_fn is None:
         aggregation_fn = np.mean
@@ -227,7 +233,7 @@ class ModelResult(BaseModel):
                     )
             return entries
-    def _get_score_for_table(self) -> list[dict[str, str | float]]:
+    def _get_score_for_table(self) -> list[dict[str, str | float | list[str]]]:
         scores_data = []
         model_name = self.model_name
         for task_result in self.task_results:
@@ -239,10 +245,10 @@ class ModelResult(BaseModel):
                         "model_revision": self.model_revision,
                         "task_name": task_name,
                         "split": split,
+                        "language": score_item.get("languages", ["Unknown"]),
                         "subset": score_item.get("hf_subset", "default"),
                         "score": score_item.get("main_score", None),
                     }
                     scores_data.append(row)
         return scores_data

{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mteb
-Version: 2.3.8
+Version: 2.3.10
 Summary: Massive Text Embedding Benchmark
 Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
 Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>

{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/RECORD RENAMED Viewed

@@ -56,11 +56,11 @@ mteb/abstasks/text/bitext_mining.py,sha256=8m86XHJ3TxguC9itxZRq2Bt_p0NYojojS2Btk
 mteb/abstasks/text/reranking.py,sha256=rfRGRBeSjZLgkh8pneMgRm-vd9NHr5jSFH92YfOHfmU,7776
 mteb/abstasks/text/summarization.py,sha256=KYEb8gh4JjpSsrvGUmQ2VlrVdzzVxIWcitXOJUaHhO4,6954
 mteb/benchmarks/__init__.py,sha256=MQEVeli-zLaJ7Xg0z7RhXQwsdmm7Ht_W2Ln0rZo1Szc,225
-mteb/benchmarks/_create_table.py,sha256=OAiR44ynJ2fMzoBmVITQtOTYQzxIu9KUdS_HzlBlAck,20195
-mteb/benchmarks/benchmark.py,sha256=70RlMyyg_wkWTlU_IbfLl-KaqRWXGCKTd8fWe9X-AQE,4173
+mteb/benchmarks/_create_table.py,sha256=b2RqGqi0ZonKbHecEcZiF4pkfE96smFRIzxOI82ETA8,22304
+mteb/benchmarks/benchmark.py,sha256=UEllUtZQ0L10SNnxRyKbiv4wLCMcNF2nUPhBDKY3nz8,5097
 mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
 mteb/benchmarks/benchmarks/__init__.py,sha256=Ig5dSFunzI-F-OamruuKJVSstbG3xQNkXCxRY3Bj_Ck,2180
-mteb/benchmarks/benchmarks/benchmarks.py,sha256=vWX6QZgqF9iKAE1tIQwaXw9f8q_WiBtdgo8yj4_CHFI,94767
+mteb/benchmarks/benchmarks/benchmarks.py,sha256=mZQ56KBQwnBj2qLSQFOv39Av0HBNpH9HXYsDoFmqvu4,95640
 mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=QnCSrTTaBfcRlAQp2Nu81tgv1idMXqiM16Fp2zKJ5Ys,10607
 mteb/cli/__init__.py,sha256=v-csUr3eUZElIvrGB6QGtaIdndDfNWEe9oZchsGsJpg,64
 mteb/cli/_display_tasks.py,sha256=7A06dT9sSoTz6shyMvskPxuc5eHY_H7PGPlROzMP0yw,2196
@@ -1430,10 +1430,10 @@ mteb/languages/language_family.json,sha256=OUGcHeOIPcZPb2FWmYLhxTS0JxjK5y3Fo6x0P
 mteb/languages/language_scripts.py,sha256=5wix9HTYolNIpTiS5oXf2pGJyL7ftdGKs_m432w81V8,3998
 mteb/languages/programming_languages.py,sha256=zxAakT3OSUnAuTnQ34VyeFIECnNXMlleZmAake6jsZE,211
 mteb/leaderboard/__init__.py,sha256=991roXmtRwEQysV-37hWEzWpkvPgMCGRqZTHR-hm2io,88
-mteb/leaderboard/app.py,sha256=29MxFLKEVT-roULHG5boHmsQVhld1rDGNS94r7MWlz8,33118
+mteb/leaderboard/app.py,sha256=-sBAkZ9JTr9czhsYEbSm92MfTmB8BOQ17WDkQ1dsP90,34282
 mteb/leaderboard/benchmark_selector.py,sha256=qd-2L20RQ4ACke01UlytkhZok1dkWgfUlXzfET52kGc,7956
 mteb/leaderboard/figures.py,sha256=mPO0go_23QEhAm1RJdLiBxPFCoUiA74_ztyl6yimc7k,7553
-mteb/leaderboard/table.py,sha256=6SnrYC5GcBlvVSO6vOk6ObuqtoveBLv3JUuXqdKueG8,8333
+mteb/leaderboard/table.py,sha256=NxXAUkQRWtxjJwfIiO9yvdvw9do3ogzqmAn6az01SSc,10609
 mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
 mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
 mteb/models/abs_encoder.py,sha256=m0JkRfRPMYadDgBR9eozRloI31ZSWkSzDFINpwbfLZk,16533
@@ -1477,7 +1477,7 @@ mteb/models/model_implementations/colsmol_models.py,sha256=O2M7Ksydh94M_Iax4KytH
 mteb/models/model_implementations/conan_models.py,sha256=G-s7xo9VtNX-f7lWKtYVGHHiMMN0Xp44PlNIp7E0LAo,6502
 mteb/models/model_implementations/dino_models.py,sha256=QFgaFHR5YKrylqJGSljXCBn2W7qHhmF6KdXkvHrQNEI,16380
 mteb/models/model_implementations/e5_instruct.py,sha256=9R4GoSFicgqNDCh3HhTN_8L1qhzuEKvatjHYn3T9zlU,7676
-mteb/models/model_implementations/e5_models.py,sha256=vsqkmm6XzZn9ROj_OUR0j2KiN75MEuQsOPeoyc1AeYg,10937
+mteb/models/model_implementations/e5_models.py,sha256=ZLRgzx2uEBc_yWY6DwcJFUNKG6RHpWSEVp1_jaEURhs,9373
 mteb/models/model_implementations/e5_v.py,sha256=_9W7I0ryIzx_H9eCkzwdm8iHdGX1LIjKGXkhSh_zNv8,6690
 mteb/models/model_implementations/eagerworks_models.py,sha256=NOQkCUqn9jLSpf9p6KyaIHnJxYV1MNlr2z7hO2AcRSc,5744
 mteb/models/model_implementations/emillykkejensen_models.py,sha256=QdhGqCm_1-AURkrniZj2S1MjwwIVOPMzLvpgfJq-3EQ,2779
@@ -1485,6 +1485,7 @@ mteb/models/model_implementations/en_code_retriever.py,sha256=leZ-0M6LrunocY3XQB
 mteb/models/model_implementations/euler_models.py,sha256=fZoXYeDjSRN2Qj1Pf-ROi8xok03PjhYi4FLEZKjMPkk,905
 mteb/models/model_implementations/evaclip_models.py,sha256=cPMGYLDIq4s8zJxb4vPXqJ-rqwPaq7KOh2QZSO6cDas,8000
 mteb/models/model_implementations/fa_models.py,sha256=WGal70_ezITWoNdjcMdbOCTSCtoaXzuPadYstLVXxhg,7478
+mteb/models/model_implementations/facebookai.py,sha256=uhE6rB1YgxE0SIc7u8heE1U62qRFFA23IMgpjxBq_Ok,3116
 mteb/models/model_implementations/geogpt_models.py,sha256=Juv86SwhgQX80lVLjAFtim2aSiJT1AcgjniyyiKyk1Q,1923
 mteb/models/model_implementations/gme_v_models.py,sha256=NkfgR3_UdZzoBt1NnalVou6LOR-F7qXM4by9EbAVrys,13568
 mteb/models/model_implementations/google_models.py,sha256=7QfsaJ5JNDRQxFl7Zh2AtiR2PR7PZcfeCBgviuOFBCo,9130
@@ -1499,7 +1500,9 @@ mteb/models/model_implementations/jasper_models.py,sha256=ZY7qRRpBpD3eVryQb4rLs5
 mteb/models/model_implementations/jina_clip.py,sha256=CfiIxbhKspjQajNtObCfGPHOWPk6uLn4cuwydQHFTMo,5118
 mteb/models/model_implementations/jina_models.py,sha256=HrHm2Io3g9gHwxU5icAaudy_E8rAVkAAIFSzVYWF-dM,34859
 mteb/models/model_implementations/kalm_models.py,sha256=FmW7Z5Qs6WYBLuKvql3u4IJW36kj4k-Ypah8qTBEBkg,59837
+mteb/models/model_implementations/kblab.py,sha256=DDh8gDEI6YPjS4_yGYWC4HatE0mFf7vhGDU83zzV7V0,866
 mteb/models/model_implementations/kennethenevoldsen_models.py,sha256=DF-9nmsewYO9ikZ0kV81ujKGr7Ot36-9iPoxN7KX2mY,2993
+mteb/models/model_implementations/kfst.py,sha256=BQj0fxMJwyA6NOdK26NDYVL3z2PW1_F-lTTVImxEWZQ,892
 mteb/models/model_implementations/kowshik24_models.py,sha256=HoQpybjhquK2XSnawlq0aiSWFI5M7l6N4DNY4MQ-P10,976
 mteb/models/model_implementations/lens_models.py,sha256=fC7_NB1F8vBAlXD0p0-hALf6eZTPFJwpz57dy71OlwI,1696
 mteb/models/model_implementations/lgai_embedding_models.py,sha256=S83pbfkMH3YUNl4skusgbK-Rn-uLuScQVxgXwegR_N4,2333
@@ -1526,6 +1529,7 @@ mteb/models/model_implementations/openclip_models.py,sha256=W8XcokgLU1nSmMaWpYXk
 mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=fuxIjOx_kPoDps5C7LW3JllG-AZj4ktqeTNgJESHZh4,8351
 mteb/models/model_implementations/ops_moa_models.py,sha256=luWw1j2iTMx1z1ydLCjvCI89E9Yvge7ruEawivJTmfE,2413
 mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py,sha256=qGXv71qRjNCIFluZOwvfBlFlKKyN2bXBokwUPk4KHmM,1066
+mteb/models/model_implementations/pawan_models.py,sha256=rV2ePGIuYroocvwqDXm4VU369Y_Vr67CyAE-08K5B9c,1151
 mteb/models/model_implementations/piccolo_models.py,sha256=d8Dtkv_ZTUOCmJLLOuwquq-gX-2UfKvAtl_LvAS0Xi0,2113
 mteb/models/model_implementations/promptriever_models.py,sha256=S7uWes_P74p3OZR_KBJHJN_ezlvvRx2__46DMCWqV5M,6328
 mteb/models/model_implementations/pylate_models.py,sha256=yINGQL97S4xjj74-FTWpO4KHX-E9NDOEeyQWyRmmnaE,14772
@@ -1573,8 +1577,8 @@ mteb/models/search_encoder_index/search_backend_protocol.py,sha256=TSjlx88stJcMl
 mteb/models/search_encoder_index/search_indexes/__init__.py,sha256=Wm60_oUemUpFsvrCMW111dcPH2L2rt1iZrXMskXmG7o,88
 mteb/models/search_encoder_index/search_indexes/faiss_search_index.py,sha256=WMs3QbbYV13fRuT3dakmdVMZLFdc_9ZzSupS3QxlbVQ,5555
 mteb/results/__init__.py,sha256=EXQqK4Am5eIYzD52dpcGAFSdqnC38oE6JHN302oidHc,158
-mteb/results/benchmark_results.py,sha256=OWqeBxbNsPmOKRhxY980N5CikpdJXToDGJGTXUe64Lw,18209
-mteb/results/model_result.py,sha256=pTyGFTLg6l1wmc3Ul1CJK6ESBqWJAuU4aeT8iFygAdU,13746
+mteb/results/benchmark_results.py,sha256=b_g0QmTbwue9ZpWTtyPfgf_nyavckZHUgTVE6zqqtzM,18342
+mteb/results/model_result.py,sha256=Y6b_xfJlw8EFZq464ZVhyw0Rryv111hvMjnXbEZJpXk,14059
 mteb/results/task_result.py,sha256=DgmAw6akotjp8m8E6gE8QP9mQMxUvyzu1hnZ5o01GkU,32303
 mteb/tasks/__init__.py,sha256=izAxU0ip1F_YUwx0dFCuN35BaktdmePh6vlDiHC0kLo,503
 mteb/tasks/aggregated_tasks/__init__.py,sha256=Ufgbh1AirxCQkojO3AUhUFWM8zQG10cfdVTkj_PeyLI,104
@@ -2578,9 +2582,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
 mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
 mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
 mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
-mteb-2.3.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-mteb-2.3.8.dist-info/METADATA,sha256=QMpRmhMLXi45L0d29kCoNcEugCwDl8IWCc3wE_r-fb4,13923
-mteb-2.3.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-mteb-2.3.8.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
-mteb-2.3.8.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
-mteb-2.3.8.dist-info/RECORD,,
+mteb-2.3.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+mteb-2.3.10.dist-info/METADATA,sha256=IPpkXC-YeiZU0BtiAnv-e9aS8X99_uAsGYxCCIz7nr4,13924
+mteb-2.3.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+mteb-2.3.10.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
+mteb-2.3.10.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
+mteb-2.3.10.dist-info/RECORD,,

{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/top_level.txt RENAMED Viewed

File without changes

mteb 2.3.8__py3-none-any.whl → 2.3.10__py3-none-any.whl

mteb 2.3.8py3-none-any.whl → 2.3.10py3-none-any.whl