PyPI - mteb - Versions diffs - 2.1.4__py3-none-any.whl → 2.5.2__py3-none-any.whl - Mend

mteb 2.1.4py3-none-any.whl → 2.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (458) hide show

mteb/results/benchmark_results.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import functools
 import json
 import logging
 import warnings
@@ -15,6 +16,7 @@ from mteb.abstasks.task_metadata import (
     TaskDomain,
     TaskType,
 )
+from mteb.benchmarks.benchmark import Benchmark
 from mteb.models import ModelMeta
 from mteb.models.get_model_meta import get_model_metas
 from mteb.types import (
@@ -31,6 +33,24 @@ from .model_result import ModelResult, _aggregate_and_pivot
 logger = logging.getLogger(__name__)
+# Global cache for model metas and version parsing
+@functools.lru_cache
+def _get_cached_model_metas() -> dict[str, str | None]:
+    """Cache model metas to avoid repeated calls."""
+    return {meta.name: meta.revision for meta in get_model_metas()}
+@functools.lru_cache(maxsize=10000)
+def _parse_version_cached(version_str: str | None) -> Version | None:
+    """Cache version parsing to avoid repeated parsing."""
+    if version_str is None:
+        return None
+    try:
+        return Version(version_str)
+    except (InvalidVersion, TypeError):
+        return None
 class BenchmarkResults(BaseModel):
     """Data class to hold the benchmark results of a model.
@@ -39,10 +59,10 @@ class BenchmarkResults(BaseModel):
     """
     model_results: list[ModelResult]
-    model_config = (
-        ConfigDict(  # to free up the name model_results which is otherwise protected
-            protected_namespaces=(),
-        )
+    benchmark: Benchmark | None = None
+    model_config = ConfigDict(
+        protected_namespaces=(),  # to free up the name model_results which is otherwise protected
+        arbitrary_types_allowed=True,  # Benchmark is dataclasses.dataclass
     )
     def __repr__(self) -> str:
@@ -173,40 +193,6 @@ class BenchmarkResults(BaseModel):
         Returns:
             A new BenchmarkResults object with the revisions joined.
         """
-        def parse_version(version_str: str) -> Version | None:
-            try:
-                return Version(version_str)
-            except (InvalidVersion, TypeError):
-                return None
-        def keep_best(group: pd.DataFrame) -> pd.DataFrame:
-            # Filtering out task_results where no scores are present
-            group = group[group["has_scores"]]
-            is_main_revision = group["revision"] == group["main_revision"]
-            # If the main revision is present we select that
-            if is_main_revision.sum() > 0:
-                return group[is_main_revision].head(n=1)
-            unique_revisions = group["revision"].unique()
-            # ensure None/NA/"external" revisions is filtered out
-            group.loc[group["revision"].isna(), "revision"] = "no_revision_available"
-            group.loc[group["revision"] == "external", "revision"] = (
-                "no_revision_available"
-            )
-            # Filtering out no_revision_available if other revisions are present
-            if (len(unique_revisions) > 1) and (
-                "no_revision_available" in unique_revisions
-            ):
-                group = group[group["revision"] != "no_revision_available"]
-            # If there are any not-NA mteb versions, we select the latest one
-            if group["mteb_version"].notna().any():
-                group = group.dropna(subset=["mteb_version"])
-                group = group.sort_values("mteb_version", ascending=False)
-                return group.head(n=1)
-            return group.head(n=1)
         records = []
         for model_result in self:
             for task_result in model_result.task_results:
@@ -223,17 +209,54 @@ class BenchmarkResults(BaseModel):
         if not records:
             return BenchmarkResults.model_construct(model_results=[])
         task_df = pd.DataFrame.from_records(records)
-        model_to_main_revision = {
-            meta.name: meta.revision for meta in get_model_metas()
-        }
+        # Use cached model metas
+        model_to_main_revision = _get_cached_model_metas()
         task_df["main_revision"] = task_df["model"].map(model_to_main_revision)  # type: ignore
-        task_df["mteb_version"] = task_df["mteb_version"].map(parse_version)  # type: ignore
-        task_df = (
-            task_df.groupby(["model", "task_name"])
-            .apply(keep_best)
-            .reset_index(drop=True)
+        # Use cached version parsing
+        task_df["mteb_version"] = task_df["mteb_version"].map(_parse_version_cached)  # type: ignore
+        # Filter out rows without scores first
+        task_df = task_df[task_df["has_scores"]]
+        # Optimize groupby with vectorized operations
+        # Sort by priority: main_revision match, then mteb_version (descending), then revision
+        task_df["is_main_revision"] = task_df["revision"] == task_df["main_revision"]
+        # Handle None/NA/external revisions
+        task_df["revision_clean"] = task_df["revision"].copy()
+        task_df.loc[task_df["revision"].isna(), "revision_clean"] = (
+            "no_revision_available"
         )
+        task_df.loc[task_df["revision"] == "external", "revision_clean"] = (
+            "no_revision_available"
+        )
+        # Create a priority column for sorting
+        # Higher priority = better to keep
+        # Priority: main_revision (1000), has valid mteb_version (100), has valid revision (10)
+        task_df["priority"] = 0
+        task_df.loc[task_df["is_main_revision"], "priority"] += 1000
+        task_df.loc[task_df["mteb_version"].notna(), "priority"] += 100
+        task_df.loc[
+            task_df["revision_clean"] != "no_revision_available", "priority"
+        ] += 10
+        # Sort by priority (desc), mteb_version (desc), and take first per group
+        task_df = task_df.sort_values(
+            ["model", "task_name", "priority", "mteb_version"],
+            ascending=[True, True, False, False],
+            na_position="last",
+        )
+        task_df = task_df.groupby(["model", "task_name"], as_index=False).first()
+        # Reconstruct model results
         model_results = []
+        # Group by original revision to maintain deterministic behavior
+        # After the first() selection above, each (model, task_name) is unique,
+        # so grouping by original revision ensures consistent ModelResult creation
         for (model, model_revision), group in task_df.groupby(["model", "revision"]):
             model_result = ModelResult.model_construct(
                 model_name=model,
@@ -296,7 +319,7 @@ class BenchmarkResults(BaseModel):
     def to_dataframe(
         self,
-        aggregation_level: Literal["subset", "split", "task"] = "task",
+        aggregation_level: Literal["subset", "split", "task", "language"] = "task",
         aggregation_fn: Callable[[list[Score]], Any] | None = None,
         include_model_revision: bool = False,
         format: Literal["wide", "long"] = "wide",
@@ -321,6 +344,7 @@ class BenchmarkResults(BaseModel):
                 - "subset"/None: No aggregation will be done. The DataFrame will have one row per model, task, split and subset.
                 - "split": Aggregates the scores by split. The DataFrame will have one row per model, task and split.
                 - "task": Aggregates the scores by task. The DataFrame will have one row per model and task.
+                - "language": Aggregates the scores by language. The DataFrame will have one row per model and language.
             aggregation_fn: The function to use for aggregation. If None, the mean will be used.
             include_model_revision: If True, the model revision will be included in the DataFrame. If False, it will be excluded.
                 If there are multiple revisions for the same model, they will be joined using the `join_revisions` method.
@@ -361,6 +385,23 @@ class BenchmarkResults(BaseModel):
             format=format,
         )
+    def get_benchmark_result(self) -> pd.DataFrame:
+        """Get aggregated scores for each model in the benchmark.
+        Uses the benchmark's summary table creation method to compute scores.
+        Returns:
+            A DataFrame with the aggregated benchmark scores for each model.
+        """
+        if self.benchmark is None:
+            raise ValueError(
+                "No benchmark associated with these results (self.benchmark is None). "
+                "To get benchmark results, load results with a Benchmark object. "
+                "`results = cache.load_results(tasks='MTEB(eng, v2)')`"
+            )
+        return self.benchmark._create_summary_table(self)
     def __iter__(self) -> Iterator[ModelResult]:
         return iter(self.model_results)

mteb/results/model_result.py CHANGED Viewed

@@ -22,7 +22,7 @@ from mteb.types import (
     SplitName,
 )
-from .task_result import TaskResult
+from .task_result import TaskError, TaskResult
 logger = logging.getLogger(__name__)
@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
 def _aggregate_and_pivot(
     df: pd.DataFrame,
     columns: list[str],
-    aggregation_level: Literal["subset", "split", "task"],
+    aggregation_level: Literal["subset", "split", "task", "language"],
     format: Literal["wide", "long"],
     aggregation_fn: Callable[[list[Score]], Any] | None,
 ) -> pd.DataFrame:
@@ -43,6 +43,12 @@ def _aggregate_and_pivot(
     elif aggregation_level == "task":
         index_columns = ["task_name"]
+    elif aggregation_level == "language":
+        index_columns = ["language"]
+        df = df.explode("language").reset_index(
+            drop=True
+        )  # each language in its own row before aggregation
     # perform aggregation
     if aggregation_fn is None:
         aggregation_fn = np.mean
@@ -82,6 +88,7 @@ class ModelResult(BaseModel):
             protected_namespaces=(),
         )
     )
+    exceptions: list[TaskError] | None = None
     def __repr__(self) -> str:
         n_entries = len(self.task_results)
@@ -226,7 +233,7 @@ class ModelResult(BaseModel):
                     )
             return entries
-    def _get_score_for_table(self) -> list[dict[str, str | float]]:
+    def _get_score_for_table(self) -> list[dict[str, str | float | list[str]]]:
         scores_data = []
         model_name = self.model_name
         for task_result in self.task_results:
@@ -238,10 +245,10 @@ class ModelResult(BaseModel):
                         "model_revision": self.model_revision,
                         "task_name": task_name,
                         "split": split,
+                        "language": score_item.get("languages", ["Unknown"]),
                         "subset": score_item.get("hf_subset", "default"),
                         "score": score_item.get("main_score", None),
                     }
                     scores_data.append(row)
         return scores_data

mteb/results/task_result.py CHANGED Viewed

@@ -633,21 +633,23 @@ class TaskResult(BaseModel):
             task = get_task(self.task_name)
         splits = task.eval_splits
-        hf_subsets = task.hf_subsets
-        hf_subsets = set(hf_subsets)
+        hf_subsets = set(task.hf_subsets)  # Convert to set once
         new_scores = {}
         seen_splits = set()
         for split in self.scores:
             if split not in splits:
                 continue
-            new_scores[split] = []
             seen_subsets = set()
-            for _scores in self.scores[split]:
-                if _scores["hf_subset"] not in hf_subsets:
-                    continue
-                new_scores[split].append(_scores)
+            # Use list comprehension for better performance
+            new_scores[split] = [
+                _scores
+                for _scores in self.scores[split]
+                if _scores["hf_subset"] in hf_subsets
+            ]
+            for _scores in new_scores[split]:
                 seen_subsets.add(_scores["hf_subset"])
             if seen_subsets != hf_subsets:
                 missing_subsets = hf_subsets - seen_subsets
                 if len(missing_subsets) > 2:
@@ -664,9 +666,9 @@ class TaskResult(BaseModel):
             logger.warning(
                 f"{task.metadata.name}: Missing splits {set(splits) - seen_splits}"
             )
-        new_res = {**self.to_dict(), "scores": new_scores}
-        new_res = TaskResult.from_validated(**new_res)
-        return new_res
+        data = self.model_dump()
+        data["scores"] = new_scores
+        return type(self).model_construct(**data)
     def is_mergeable(
         self,
@@ -698,27 +700,31 @@ class TaskResult(BaseModel):
             name = result.metadata.name
             revision = result.metadata.revision
         else:
+            msg = "result must be a TaskResult or AbsTask object"
+            if raise_error:
+                raise ValueError(msg)
+            logger.debug(msg)
             return False
         if self.task_name != name:
+            msg = f"Cannot merge TaskResult objects as they are derived from different tasks ({self.task_name} and {name})"
             if raise_error:
-                raise ValueError(
-                    f"Cannot merge TaskResult objects as they are derived from different tasks ({self.task_name} and {name})"
-                )
+                raise ValueError(msg)
+            logger.debug(msg)
             return False
         if Criteria.MTEB_VERSION in criteria and self.mteb_version != mteb_version:
+            msg = f"Cannot merge TaskResult objects as they are derived from different MTEB versions ({self.mteb_version} (loaded) and {mteb_version} (current))"
             if raise_error:
-                raise ValueError(
-                    f"Cannot merge TaskResult objects as they are derived from different MTEB versions ({self.mteb_version} and {mteb_version})"
-                )
+                raise ValueError(msg)
+            logger.debug(msg)
             return False
         if Criteria.DATASET_REVISION in criteria and self.dataset_revision != revision:
+            msg = f"Cannot merge TaskResult objects as they are derived from different dataset revisions ({self.dataset_revision} and {revision})"
             if raise_error:
-                raise ValueError(
-                    f"Cannot merge TaskResult objects as they are derived from different dataset revisions ({self.dataset_revision} and {revision})"
-                )
+                raise ValueError(msg)
+            logger.debug(msg)
             return False
         return True
@@ -836,3 +842,15 @@ class TaskResult(BaseModel):
                     )
                 )
         return results
+class TaskError(BaseModel):
+    """A class to represent an error that occurred during the evaluation of a task.
+    Attributes:
+        task_name: The name of the MTEB task.
+        exception: The error message that occurred during the evaluation.
+    """
+    task_name: str
+    exception: str

mteb/similarity_functions.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import torch
 from mteb.models import EncoderProtocol
+from mteb.models.model_meta import ScoringFunction
 from mteb.types import Array
@@ -38,6 +39,54 @@ def compute_pairwise_similarity(
     return pairwise_cos_sim(embedding1, embedding2)
+def select_similarity(
+    embedding1: Array,
+    embedding2: Array,
+    similarity_fn: ScoringFunction,
+) -> Array:
+    """Compute similarity between two sets of embeddings using the specified similarity function.
+    Args:
+        embedding1: The first set of embeddings.
+        embedding2: The second set of embeddings.
+        similarity_fn: The similarity function to use (COSINE, DOT_PRODUCT, EUCLIDEAN).
+    Returns:
+        Array: The computed similarity scores.
+    """
+    if similarity_fn is ScoringFunction.COSINE:
+        return cos_sim(embedding1, embedding2)
+    elif similarity_fn is ScoringFunction.DOT_PRODUCT:
+        return dot_score(embedding1, embedding2)
+    elif similarity_fn is ScoringFunction.EUCLIDEAN:
+        return euclidean_sim(embedding1, embedding2)
+    raise ValueError(f"Unsupported similarity function: {similarity_fn}")
+def select_pairwise_similarity(
+    embedding1: Array,
+    embedding2: Array,
+    similarity_fn: ScoringFunction,
+) -> Array:
+    """Compute pairwise similarity between two sets of embeddings using the specified similarity function.
+    Args:
+        embedding1: The first set of embeddings.
+        embedding2: The second set of embeddings.
+        similarity_fn: The similarity function to use (COSINE, DOT_PRODUCT, EUCLIDEAN).
+    Returns:
+        Array: The computed pairwise similarity scores.
+    """
+    if similarity_fn is ScoringFunction.COSINE:
+        return pairwise_cos_sim(embedding1, embedding2)
+    elif similarity_fn is ScoringFunction.DOT_PRODUCT:
+        return pairwise_dot_score(embedding1, embedding2)
+    elif similarity_fn is ScoringFunction.EUCLIDEAN:
+        return pairwise_euclidean_sim(embedding1, embedding2)
+    raise ValueError(f"Unsupported similarity function: {similarity_fn}")
 def _normalize_embeddings(embeddings: Array) -> torch.Tensor:
     """Normalizes the embeddings matrix, so that each sentence embedding has unit length.

mteb/tasks/bitext_mining/multilingual/__init__.py CHANGED Viewed

@@ -16,7 +16,7 @@ from .nusa_translation_bitext_mining import NusaTranslationBitextMining
 from .nusa_x_bitext_mining import NusaXBitextMining
 from .phinc_bitext_mining import PhincBitextMining
 from .roma_tales_bitext_mining import RomaTalesBitextMining
-from .ru_sci_bench_bitext_mining import RuSciBenchBitextMining
+from .ru_sci_bench_bitext_mining import RuSciBenchBitextMining, RuSciBenchBitextMiningV2
 from .tatoeba_bitext_mining import TatoebaBitextMining
 from .web_faq_bitext_mining import WebFAQBitextMiningQAs, WebFAQBitextMiningQuestions
@@ -40,6 +40,7 @@ __all__ = [
     "PhincBitextMining",
     "RomaTalesBitextMining",
     "RuSciBenchBitextMining",
+    "RuSciBenchBitextMiningV2",
     "TatoebaBitextMining",
     "WebFAQBitextMiningQAs",
     "WebFAQBitextMiningQuestions",

mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py CHANGED Viewed

@@ -23,7 +23,7 @@ class BUCCBitextMining(AbsTaskBitextMining):
             "path": "mteb/BUCC",
             "revision": "414572247440f0ccacf7eb0bb70a31533a0e5443",
         },
-        description="BUCC bitext mining dataset",
+        description="BUCC bitext mining dataset train split.",
         reference="https://comparable.limsi.fr/bucc2018/bucc2018-task.html",
         type="BitextMining",
         category="t2t",
@@ -71,7 +71,9 @@ Rapp, Reinhard},
                 sentence1 = data["sentence1"][0]
                 sentence2 = data["sentence2"][0]
-                sentence1 = [sentence1[i] for (i, j) in gold]
+                sentence1 = [
+                    sentence1[i] for (i, j) in gold
+                ]  # keep only sentences in gold. The 2nd value is meant for sentence2 but not used here. This is fixed in BUCC.v2.
                 logger.info(f"Lang {lang} num gold {len(gold)}")
                 logger.info(f"Lang {lang} num sentence1 {len(sentence1)}")
                 logger.info(f"Lang {lang} num sentence2 {len(sentence2)}")

mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py CHANGED Viewed

@@ -20,7 +20,7 @@ class BUCCBitextMiningFast(AbsTaskBitextMining):
             "path": "mteb/bucc-bitext-mining",
             "revision": "1739dc11ffe9b7bfccd7f3d585aeb4c544fc6677",
         },
-        description="BUCC bitext mining dataset",
+        description="BUCC bitext mining dataset train split, gold set only.",
         reference="https://comparable.limsi.fr/bucc2018/bucc2018-task.html",
         type="BitextMining",
         category="t2t",

mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py CHANGED Viewed

@@ -10,11 +10,53 @@ class RuSciBenchBitextMining(AbsTaskBitextMining):
             "path": "mlsa-iai-msu-lab/ru_sci_bench_bitext_mining",
             "revision": "e5840033c5cf2573932db027ac8001fe0a7eb6fa",
         },
-        description="""This task focuses on finding translations of scientific articles.
-        The dataset is sourced from eLibrary, Russia's largest electronic library of scientific publications.
-        Russian authors often provide English translations for their abstracts and titles,
-        and the data consists of these paired titles and abstracts. The task evaluates a model's ability
-        to match an article's Russian title and abstract to its English counterpart, or vice versa.""",
+        description="This task focuses on finding translations of scientific articles. The dataset is sourced from eLibrary, Russia's largest electronic library of scientific publications. Russian authors often provide English translations for their abstracts and titles, and the data consists of these paired titles and abstracts. The task evaluates a model's ability to match an article's Russian title and abstract to its English counterpart, or vice versa.",
+        reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
+        type="BitextMining",
+        category="t2c",
+        modalities=["text"],
+        eval_splits=["test"],
+        eval_langs={
+            "ru-en": ["rus-Cyrl", "eng-Latn"],
+            "en-ru": ["eng-Latn", "rus-Cyrl"],
+        },
+        main_score="f1",
+        date=("2007-01-01", "2023-01-01"),
+        domains=["Academic", "Non-fiction", "Written"],
+        task_subtypes=[],
+        license="not specified",
+        dialect=[],
+        sample_creation="found",
+        annotations_creators="derived",
+        bibtex_citation=r"""
+@article{vatolin2024ruscibench,
+  author = {Vatolin, A. and Gerasimenko, N. and Ianina, A. and Vorontsov, K.},
+  doi = {10.1134/S1064562424602191},
+  issn = {1531-8362},
+  journal = {Doklady Mathematics},
+  month = {12},
+  number = {1},
+  pages = {S251--S260},
+  title = {RuSciBench: Open Benchmark for Russian and English Scientific Document Representations},
+  url = {https://doi.org/10.1134/S1064562424602191},
+  volume = {110},
+  year = {2024},
+}
+""",
+        prompt="Given the following title and abstract of the scientific article, find its translation",
+        superseded_by="RuSciBenchBitextMining.v2",
+    )
+class RuSciBenchBitextMiningV2(AbsTaskBitextMining):
+    fast_loading = True
+    metadata = TaskMetadata(
+        name="RuSciBenchBitextMining.v2",
+        dataset={
+            "path": "mlsa-iai-msu-lab/ru_sci_bench_bitext_mining",
+            "revision": "20e815e8ac8787331546386dfd177821510f79a3",
+        },
+        description="This task focuses on finding translations of scientific articles. The dataset is sourced from eLibrary, Russia's largest electronic library of scientific publications. Russian authors often provide English translations for their abstracts and titles, and the data consists of these paired titles and abstracts. The task evaluates a model's ability to match an article's Russian title and abstract to its English counterpart, or vice versa. Compared to the previous version, 6 erroneous examples have been removed.",
         reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
         type="BitextMining",
         category="t2c",

mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py CHANGED Viewed

@@ -198,9 +198,7 @@ _SPLITS = ["default"]
 class WebFAQBitextMiningQuestions(AbsTaskBitextMining):
     metadata = TaskMetadata(
         name="WebFAQBitextMiningQuestions",
-        description="""The WebFAQ Bitext Dataset consists of natural FAQ-style Question-Answer pairs that align across languages.
-A sentence in the "WebFAQBitextMiningQuestions" task is the question originating from an aligned QA.
-The dataset is sourced from FAQ pages on the web.""",
+        description='The WebFAQ Bitext Dataset consists of natural FAQ-style Question-Answer pairs that align across languages. A sentence in the "WebFAQBitextMiningQuestions" task is the question originating from an aligned QA. The dataset is sourced from FAQ pages on the web.',
         reference="https://huggingface.co/PaDaS-Lab",
         dataset={
             "path": "PaDaS-Lab/webfaq-bitexts",
@@ -254,9 +252,7 @@ The dataset is sourced from FAQ pages on the web.""",
 class WebFAQBitextMiningQAs(AbsTaskBitextMining):
     metadata = TaskMetadata(
         name="WebFAQBitextMiningQAs",
-        description="""The WebFAQ Bitext Dataset consists of natural FAQ-style Question-Answer pairs that align across languages.
-A sentence in the "WebFAQBitextMiningQAs" task is a concatenation of a question and its corresponding answer.
-The dataset is sourced from FAQ pages on the web.""",
+        description='The WebFAQ Bitext Dataset consists of natural FAQ-style Question-Answer pairs that align across languages. A sentence in the "WebFAQBitextMiningQAs" task is a concatenation of a question and its corresponding answer. The dataset is sourced from FAQ pages on the web.',
         reference="https://huggingface.co/PaDaS-Lab",
         dataset={
             "path": "PaDaS-Lab/webfaq-bitexts",

mteb/tasks/classification/ara/ajgt.py CHANGED Viewed

@@ -45,8 +45,7 @@ class AJGTV2(AbsTaskClassification):
             "path": "mteb/ajgt",
             "revision": "0a3dea7301ee0c051891f04d32f3e8577a9eae36",
         },
-        description="""Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets (900 for training and 900 for testing) annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect.
-        This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
+        description="Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets (900 for training and 900 for testing) annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
         reference="https://link.springer.com/chapter/10.1007/978-3-319-60042-0_66/",
         type="Classification",
         category="t2c",

mteb/tasks/classification/ara/hotel_review_sentiment_classification.py CHANGED Viewed

@@ -45,8 +45,7 @@ class HotelReviewSentimentClassificationV2(AbsTaskClassification):
             "path": "mteb/HotelReviewSentimentClassification",
             "revision": "f5e6a24acbed4182114ffdf46747090b3f51e836",
         },
-        description="""HARD is a dataset of Arabic hotel reviews collected from the Booking.com website.
-        This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
+        description="HARD is a dataset of Arabic hotel reviews collected from the Booking.com website. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
         reference="https://link.springer.com/chapter/10.1007/978-3-319-67056-0_3",
         type="Classification",
         category="t2c",

mteb/tasks/classification/ara/online_store_review_sentiment_classification.py CHANGED Viewed

@@ -41,8 +41,7 @@ class OnlineStoreReviewSentimentClassificationV2(AbsTaskClassification):
             "path": "mteb/online_store_review_sentiment",
             "revision": "de0e8eed65adf1cbc58f8743a5f5c5df556de4c4",
         },
-        description="""This dataset contains Arabic reviews of products from the SHEIN online store.
-        This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
+        description="This dataset contains Arabic reviews of products from the SHEIN online store. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
         reference="https://huggingface.co/datasets/Ruqiya/Arabic_Reviews_of_SHEIN",
         type="Classification",
         category="t2c",

mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py CHANGED Viewed

@@ -52,8 +52,7 @@ class RestaurantReviewSentimentClassificationV2(AbsTaskClassification):
             "path": "mteb/restaurant_review_sentiment",
             "revision": "5d28c1e8fb393173a849696ed178b90a6f78754a",
         },
-        description="""Dataset of 8156 restaurant reviews from qaym.com in Arabic for sentiment analysis
-        This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
+        description="Dataset of 8156 restaurant reviews from qaym.com in Arabic for sentiment analysis This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
         reference="https://link.springer.com/chapter/10.1007/978-3-319-18117-2_2",
         type="Classification",
         category="t2c",

mteb/tasks/classification/ara/tweet_emotion_classification.py CHANGED Viewed

@@ -45,8 +45,7 @@ class TweetEmotionClassificationV2(AbsTaskClassification):
             "path": "mteb/TweetEmotionClassification",
             "revision": "930d65840c089406ceed5241b1a9ba7294e5eeae",
         },
-        description="""A dataset of 10,012 tweets that was created with the aim of covering the most frequently used emotion categories in Arabic tweets.
-        This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
+        description="A dataset of 10,012 tweets that was created with the aim of covering the most frequently used emotion categories in Arabic tweets. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
         reference="https://link.springer.com/chapter/10.1007/978-3-319-77116-8_8",
         type="Classification",
         category="t2c",

mteb/tasks/classification/ara/tweet_sarcasm_classification.py CHANGED Viewed

@@ -62,8 +62,7 @@ class TweetSarcasmClassificationV2(AbsTaskClassification):
             "path": "mteb/tweet_sarcasm",
             "revision": "3a20898e2ea3303844e907d55f7a815a7644150d",
         },
-        description="""Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets.
-        This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
+        description="Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
         reference="https://aclanthology.org/2020.osact-1.5/",
         type="Classification",
         category="t2c",

mteb/tasks/classification/ben/bengali_document_classification.py CHANGED Viewed

@@ -55,8 +55,7 @@ Islam, Tanvir},
 class BengaliDocumentClassificationV2(AbsTaskClassification):
     metadata = TaskMetadata(
         name="BengaliDocumentClassification.v2",
-        description="""Dataset for News Classification, categorized with 13 domains.
-        This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
+        description="Dataset for News Classification, categorized with 13 domains. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
         reference="https://aclanthology.org/2023.eacl-main.4",
         dataset={
             "path": "mteb/bengali_document",

mteb/tasks/classification/ben/bengali_hate_speech_classification.py CHANGED Viewed

@@ -45,8 +45,7 @@ class BengaliHateSpeechClassification(AbsTaskClassification):
 class BengaliHateSpeechClassificationV2(AbsTaskClassification):
     metadata = TaskMetadata(
         name="BengaliHateSpeechClassification.v2",
-        description="""The Bengali Hate Speech Dataset is a Bengali-language dataset of news articles collected from various Bengali media sources and categorized based on the type of hate in the text.
-        This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
+        description="The Bengali Hate Speech Dataset is a Bengali-language dataset of news articles collected from various Bengali media sources and categorized based on the type of hate in the text. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
         reference="https://huggingface.co/datasets/bn_hate_speech",
         dataset={
             "path": "mteb/bengali_hate_speech",

mteb 2.1.4__py3-none-any.whl → 2.5.2__py3-none-any.whl

mteb 2.1.4py3-none-any.whl → 2.5.2py3-none-any.whl