PyPI - mteb - Versions diffs - 2.3.3__py3-none-any.whl → 2.3.5__py3-none-any.whl - Mend

mteb 2.3.3py3-none-any.whl → 2.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

mteb/benchmarks/benchmarks/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
     FA_MTEB_2,
     HUME,
     JINA_VDR,
+    JMTEB_V2,
     LONG_EMBED,
     MIEB_ENG,
     MIEB_IMG,
@@ -75,6 +76,7 @@ __all__ = [
     "HUME",
     "HUME",
     "JINA_VDR",
+    "JMTEB_V2",
     "LONG_EMBED",
     "MIEB_ENG",
     "MIEB_IMG",

mteb/benchmarks/benchmarks/benchmarks.py CHANGED Viewed

@@ -2562,3 +2562,60 @@ HUME = HUMEBenchmark(
     citation=None,
     contacts=["AdnanElAssadi56", "KennethEnevoldsen", "isaac-chung", "Samoed"],
 )
+JMTEB_V2 = Benchmark(
+    name="JMTEB(v2)",
+    display_name="Japanese",
+    icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
+    tasks=get_tasks(
+        languages=["jpn"],
+        tasks=[
+            # Clustering (3)
+            "LivedoorNewsClustering.v2",
+            "MewsC16JaClustering",
+            "SIB200ClusteringS2S",
+            # Classification (7)
+            "AmazonReviewsClassification",
+            "AmazonCounterfactualClassification",
+            "MassiveIntentClassification",
+            "MassiveScenarioClassification",
+            "JapaneseSentimentClassification",
+            "SIB200Classification",
+            "WRIMEClassification",
+            # STS (2)
+            "JSTS",
+            "JSICK",
+            # Retrieval (11)
+            "JaqketRetrieval",
+            "MrTidyRetrieval",
+            "JaGovFaqsRetrieval",
+            "NLPJournalTitleAbsRetrieval.V2",
+            "NLPJournalTitleIntroRetrieval.V2",
+            "NLPJournalAbsIntroRetrieval.V2",
+            "NLPJournalAbsArticleRetrieval.V2",
+            "JaCWIRRetrieval",
+            "MIRACLRetrieval",
+            "MintakaRetrieval",
+            "MultiLongDocRetrieval",
+            # Reranking (5)
+            "ESCIReranking",
+            "JQaRAReranking",
+            "JaCWIRReranking",
+            "MIRACLReranking",
+            "MultiLongDocReranking",
+        ],
+    ),
+    description="JMTEB is a benchmark for evaluating Japanese text embedding models. In v2, we have extended the benchmark to 28 datasets, enabling more comprehensive evaluation compared with v1 (MTEB(jpn, v1)).",
+    reference="https://github.com/sbintuitions/JMTEB",
+    citation=r"""
+@article{li2025jmteb,
+  author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide and Kawahara, Daisuke},
+  issue = {3},
+  journal = {Vol.2025-NL-265,No.3,1-15},
+  month = {sep},
+  title = {{JMTEB and JMTEB-lite: Japanese Massive Text Embedding Benchmark and Its Lightweight Version}},
+  year = {2025},
+}
+""",
+    contacts=["lsz05"],
+)

mteb/cache.py CHANGED Viewed

@@ -243,7 +243,11 @@ class ResultCache:
             f"No results repository found in {results_directory}, cloning it from {remote}"
         )
-        subprocess.run(["git", "clone", remote, "remote"], cwd=self.cache_path)
+        subprocess.run(
+            ["git", "clone", "--depth", "1", remote, "remote"],
+            cwd=self.cache_path,
+            check=True,
+        )
         return results_directory

mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json ADDED Viewed

@@ -0,0 +1,60 @@
+{
+    "test": {
+        "num_samples": 1085,
+        "number_texts_intersect_with_train": 0,
+        "text_statistics": {
+            "total_text_length": 115359,
+            "min_text_length": 8,
+            "average_text_length": 106.32165898617511,
+            "max_text_length": 2722,
+            "unique_texts": 1085
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 3,
+            "labels": {
+                "0": {
+                    "count": 868
+                },
+                "1": {
+                    "count": 190
+                },
+                "2": {
+                    "count": 27
+                }
+            }
+        }
+    },
+    "train": {
+        "num_samples": 7176,
+        "number_texts_intersect_with_train": null,
+        "text_statistics": {
+            "total_text_length": 830248,
+            "min_text_length": 5,
+            "average_text_length": 115.69788182831661,
+            "max_text_length": 4759,
+            "unique_texts": 7176
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 3,
+            "labels": {
+                "0": {
+                    "count": 4933
+                },
+                "1": {
+                    "count": 2047
+                },
+                "2": {
+                    "count": 196
+                }
+            }
+        }
+    }
+}

mteb/models/get_model_meta.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import difflib
 import logging
+import warnings
 from collections.abc import Iterable
 from typing import TYPE_CHECKING, Any
@@ -180,6 +181,14 @@ def _model_meta_from_hf_hub(model_name: str) -> ModelMeta:
     if card_data.get("library_name", None) == "sentence-transformers":
         frameworks.append("Sentence Transformers")
         loader = sentence_transformers_loader
+    else:
+        msg = (
+            "Model library not recognized, defaulting to Sentence Transformers loader."
+        )
+        logger.warning(msg)
+        warnings.warn(msg)
+        loader = sentence_transformers_loader
     revision = card_data.get("base_model_revision", None)
     license = card_data.get("license", None)
     return ModelMeta(

mteb/models/model_implementations/ruri_models.py ADDED Viewed

@@ -0,0 +1,312 @@
+from mteb.models.model_meta import ModelMeta
+from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
+RURI_V3_PROMPTS = {
+    "Retrieval-query": "検索クエリ: ",
+    "Retrieval-document": "検索文書: ",
+    "Reranking-query": "検索クエリ: ",
+    "Reranking-document": "検索文書: ",
+    "Classification": "トピック: ",
+    "Clustering": "トピック: ",
+}
+RURI_V1_V2_PROMPTS = {
+    "query": "クエリ: ",
+    "document": "文章: ",
+}
+RURI_CITATION = r"""@misc{Ruri,
+  title={{Ruri: Japanese General Text Embeddings}},
+  author={Hayato Tsukagoshi and Ryohei Sasano},
+  year={2024},
+  eprint={2409.07737},
+  archivePrefix={arXiv},
+  primaryClass={cs.CL},
+  url={https://arxiv.org/abs/2409.07737},
+}"""
+cl_nagoya_ruri_v3_30m = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=RURI_V3_PROMPTS,
+    ),
+    name="cl-nagoya/ruri-v3-30m",
+    languages=["jpn-Jpan"],
+    open_weights=True,
+    revision="24899e5de370b56d179604a007c0d727bf144504",
+    release_date="2025-04-07",
+    n_parameters=36_705_536,
+    memory_usage_mb=140,
+    embed_dim=256,
+    license="apache-2.0",
+    max_tokens=8192,
+    reference="https://huggingface.co/cl-nagoya/ruri-v3-30m",
+    similarity_fn_name="cosine",
+    framework=["PyTorch", "Sentence Transformers"],
+    use_instructions=True,
+    superseded_by=None,
+    training_datasets={
+        "cl-nagoya/ruri-v3-dataset-ft",
+    },
+    adapted_from="sbintuitions/modernbert-ja-30m",
+    public_training_code=None,
+    public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-v3-dataset-ft",
+    citation=RURI_CITATION,
+    contacts=["hpprc"],
+)
+cl_nagoya_ruri_v3_70m = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=RURI_V3_PROMPTS,
+    ),
+    name="cl-nagoya/ruri-v3-70m",
+    languages=["jpn-Jpan"],
+    open_weights=True,
+    revision="07a8b0aba47d29d2ca21f89b915c1efe2c23d1cc",
+    release_date="2025-04-09",
+    n_parameters=36_705_536,
+    memory_usage_mb=140,
+    embed_dim=256,
+    license="apache-2.0",
+    max_tokens=8192,
+    reference="https://huggingface.co/cl-nagoya/ruri-v3-70m",
+    similarity_fn_name="cosine",
+    framework=["PyTorch", "Sentence Transformers"],
+    use_instructions=True,
+    superseded_by=None,
+    training_datasets={"MrTidyRetrieval", "MIRACLRetrieval"},
+    adapted_from="sbintuitions/modernbert-ja-70m",
+    public_training_code=None,
+    public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-v3-dataset-ft",
+    citation=RURI_CITATION,
+    contacts=["hpprc"],
+)
+cl_nagoya_ruri_v3_130m = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=RURI_V3_PROMPTS,
+    ),
+    name="cl-nagoya/ruri-v3-130m",
+    languages=["jpn-Jpan"],
+    open_weights=True,
+    revision="e3114c6ee10dbab8b4b235fbc6dcf9dd4d5ac1a6",
+    release_date="2025-04-09",
+    n_parameters=132_140_544,
+    memory_usage_mb=504,
+    embed_dim=512,
+    license="apache-2.0",
+    max_tokens=8192,
+    reference="https://huggingface.co/cl-nagoya/ruri-v3-130m",
+    similarity_fn_name="cosine",
+    framework=["PyTorch", "Sentence Transformers"],
+    use_instructions=True,
+    superseded_by=None,
+    training_datasets={"MrTidyRetrieval", "MIRACLRetrieval"},
+    adapted_from="sbintuitions/modernbert-ja-130m",
+    public_training_code=None,
+    public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-v3-dataset-ft",
+    citation=RURI_CITATION,
+    contacts=["hpprc"],
+)
+cl_nagoya_ruri_v3_310m = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=RURI_V3_PROMPTS,
+    ),
+    name="cl-nagoya/ruri-v3-310m",
+    languages=["jpn-Jpan"],
+    open_weights=True,
+    revision="18b60fb8c2b9df296fb4212bb7d23ef94e579cd3",
+    release_date="2025-04-09",
+    n_parameters=314_611_968,
+    memory_usage_mb=1200,
+    embed_dim=768,
+    license="apache-2.0",
+    max_tokens=8192,
+    reference="https://huggingface.co/cl-nagoya/ruri-v3-310m",
+    similarity_fn_name="cosine",
+    framework=["PyTorch", "Sentence Transformers"],
+    use_instructions=True,
+    superseded_by=None,
+    training_datasets={"MrTidyRetrieval", "MIRACLRetrieval"},
+    adapted_from="sbintuitions/modernbert-ja-310m",
+    public_training_code=None,
+    public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-v3-dataset-ft",
+    citation=RURI_CITATION,
+    contacts=["hpprc"],
+)
+cl_nagoya_ruri_small_v2 = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=RURI_V1_V2_PROMPTS,
+        trust_remote_code=True,
+    ),
+    name="cl-nagoya/ruri-small-v2",
+    languages=["jpn-Jpan"],
+    open_weights=True,
+    revision="db18646e673b713cd0518a5bb0fefdce21e77cd9",
+    release_date="2024-12-05",
+    n_parameters=68_087_808,
+    memory_usage_mb=260,
+    embed_dim=768,
+    license="apache-2.0",
+    max_tokens=512,
+    reference="https://huggingface.co/cl-nagoya/ruri-small-v2",
+    similarity_fn_name="cosine",
+    framework=["PyTorch", "Sentence Transformers"],
+    use_instructions=True,
+    adapted_from="line-corporation/line-distilbert-base-japanese",
+    superseded_by=None,
+    training_datasets={"MrTidyRetrieval", "MIRACLRetrieval"},
+    public_training_code=None,
+    public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-v2-ft",
+    citation=RURI_CITATION,
+    contacts=["hpprc"],
+)
+cl_nagoya_ruri_base_v2 = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=RURI_V1_V2_PROMPTS,
+    ),
+    name="cl-nagoya/ruri-base-v2",
+    languages=["jpn-Jpan"],
+    open_weights=True,
+    revision="8ce03882903668a01c83ca3b8111ac025a3bc734",
+    release_date="2024-12-05",
+    n_parameters=111_207_168,
+    memory_usage_mb=424,
+    embed_dim=768,
+    license="apache-2.0",
+    max_tokens=512,
+    reference="https://huggingface.co/cl-nagoya/ruri-base-v2",
+    similarity_fn_name="cosine",
+    framework=["PyTorch", "Sentence Transformers"],
+    use_instructions=True,
+    adapted_from="tohoku-nlp/bert-base-japanese-v3",
+    superseded_by=None,
+    training_datasets=None,
+    public_training_code=None,
+    public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-v2-ft",
+    citation=RURI_CITATION,
+    contacts=["hpprc"],
+)
+cl_nagoya_ruri_large_v2 = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=RURI_V1_V2_PROMPTS,
+    ),
+    name="cl-nagoya/ruri-large-v2",
+    languages=["jpn-Jpan"],
+    open_weights=True,
+    revision="42898ef34a5574977380ebf0dfd28cbfbd36438b",
+    release_date="2024-12-06",
+    n_parameters=337_441_792,
+    memory_usage_mb=1287,
+    embed_dim=1024,
+    license="apache-2.0",
+    max_tokens=512,
+    reference="https://huggingface.co/cl-nagoya/ruri-large-v2",
+    similarity_fn_name="cosine",
+    framework=["PyTorch", "Sentence Transformers"],
+    use_instructions=True,
+    adapted_from="tohoku-nlp/bert-large-japanese-v2",
+    superseded_by=None,
+    training_datasets=None,
+    public_training_code=None,
+    public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-v2-ft",
+    citation=RURI_CITATION,
+    contacts=["hpprc"],
+)
+cl_nagoya_ruri_small_v1 = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=RURI_V1_V2_PROMPTS,
+        trust_remote_code=True,
+    ),
+    name="cl-nagoya/ruri-small",
+    languages=["jpn-Jpan"],
+    open_weights=True,
+    revision="bc56ce90cd7a979f6eb199fc52dfe700bfd94bc3",
+    release_date="2024-08-28",
+    n_parameters=68_087_808,
+    memory_usage_mb=130,
+    embed_dim=768,
+    license="apache-2.0",
+    max_tokens=512,
+    reference="https://huggingface.co/cl-nagoya/ruri-small",
+    similarity_fn_name="cosine",
+    framework=["PyTorch", "Sentence Transformers"],
+    use_instructions=True,
+    adapted_from="line-corporation/line-distilbert-base-japanese",
+    superseded_by="cl-nagoya/ruri-small-v2",
+    training_datasets=None,
+    public_training_code=None,
+    public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-ft",
+    citation=RURI_CITATION,
+    contacts=["hpprc"],
+)
+cl_nagoya_ruri_base_v1 = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=RURI_V1_V2_PROMPTS,
+    ),
+    name="cl-nagoya/ruri-base",
+    languages=["jpn-Jpan"],
+    open_weights=True,
+    revision="1ae40b8b6c78518a499425086bab8fc16c2e4b0e",
+    release_date="2024-08-28",
+    n_parameters=111_207_168,
+    memory_usage_mb=212,
+    embed_dim=768,
+    license="apache-2.0",
+    max_tokens=512,
+    reference="https://huggingface.co/cl-nagoya/ruri-base",
+    similarity_fn_name="cosine",
+    framework=["PyTorch", "Sentence Transformers"],
+    use_instructions=True,
+    adapted_from="tohoku-nlp/bert-base-japanese-v3",
+    superseded_by="cl-nagoya/ruri-base-v2",
+    training_datasets=None,
+    public_training_code=None,
+    public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-ft",
+    citation=RURI_CITATION,
+    contacts=["hpprc"],
+)
+cl_nagoya_ruri_large_v1 = ModelMeta(
+    loader=sentence_transformers_loader,
+    loader_kwargs=dict(
+        model_prompts=RURI_V1_V2_PROMPTS,
+    ),
+    name="cl-nagoya/ruri-large",
+    languages=["jpn-Jpan"],
+    open_weights=True,
+    revision="a011c39b13e8bc137ee13c6bc82191ece46c414c",
+    release_date="2024-08-28",
+    n_parameters=337_441_792,
+    memory_usage_mb=644,
+    embed_dim=1024,
+    license="apache-2.0",
+    max_tokens=512,
+    reference="https://huggingface.co/cl-nagoya/ruri-large",
+    similarity_fn_name="cosine",
+    framework=["PyTorch", "Sentence Transformers"],
+    use_instructions=True,
+    adapted_from="tohoku-nlp/bert-large-japanese-v2",
+    superseded_by="cl-nagoya/ruri-large-v2",
+    training_datasets=None,
+    public_training_code=None,
+    public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-ft",
+    citation=RURI_CITATION,
+    contacts=["hpprc"],
+)

mteb/models/search_wrappers.py CHANGED Viewed

@@ -250,7 +250,7 @@ class SearchEncoderWrapper:
             # get top-k values
             cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(
-                torch.tensor(scores),
+                torch.as_tensor(scores),
                 min(
                     top_k + 1,
                     len(scores[1]) if len(scores) > 1 else len(scores[-1]),

mteb/tasks/classification/heb/__init__.py CHANGED Viewed

@@ -1,6 +1,11 @@
 from .hebrew_sentiment_analysis import (
     HebrewSentimentAnalysis,
     HebrewSentimentAnalysisV2,
+    HebrewSentimentAnalysisV3,
 )
-__all__ = ["HebrewSentimentAnalysis", "HebrewSentimentAnalysisV2"]
+__all__ = [
+    "HebrewSentimentAnalysis",
+    "HebrewSentimentAnalysisV2",
+    "HebrewSentimentAnalysisV3",
+]

mteb/tasks/classification/heb/hebrew_sentiment_analysis.py CHANGED Viewed

@@ -9,7 +9,12 @@ class HebrewSentimentAnalysis(AbsTaskClassification):
             "path": "mteb/HebrewSentimentAnalysis",
             "revision": "03eb0996c8234e0d8cd7206bf4763815deda12ed",
         },
-        description="HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy.",
+        description=(
+            "HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. "
+            "In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, "
+            "the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, "
+            "the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. "
+        ),
         reference="https://huggingface.co/datasets/hebrew_sentiment",
         type="Classification",
         category="t2c",
@@ -37,7 +42,7 @@ class HebrewSentimentAnalysis(AbsTaskClassification):
   year = {2018},
 }
 """,
-        superseded_by="HebrewSentimentAnalysis.v2",
+        superseded_by="HebrewSentimentAnalysis.v3",
     )
@@ -49,7 +54,61 @@ class HebrewSentimentAnalysisV2(AbsTaskClassification):
             "revision": "7ecd049fc8ac0d6f0a0121c8ff9fe44ea5bd935b",
             "name": "morph",
         },
-        description="HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
+        description=(
+            "HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. "
+            "In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, "
+            "the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, "
+            "the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. "
+            "This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)"
+        ),
+        reference="https://huggingface.co/datasets/hebrew_sentiment",
+        type="Classification",
+        category="t2c",
+        modalities=["text"],
+        eval_splits=["test"],
+        eval_langs=["heb-Hebr"],
+        main_score="accuracy",
+        date=("2015-10-01", "2015-10-31"),
+        domains=["Reviews", "Written"],
+        task_subtypes=["Sentiment/Hate speech"],
+        license="mit",
+        annotations_creators="expert-annotated",
+        dialect=[],
+        sample_creation="found",
+        bibtex_citation=r"""
+@inproceedings{amram-etal-2018-representations,
+  address = {Santa Fe, New Mexico, USA},
+  author = {Amram, Adam and Ben David, Anat and Tsarfaty, Reut},
+  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
+  month = aug,
+  pages = {2242--2252},
+  publisher = {Association for Computational Linguistics},
+  title = {Representations and Architectures in Neural Sentiment Analysis for Morphologically Rich Languages: A Case Study from {M}odern {H}ebrew},
+  url = {https://www.aclweb.org/anthology/C18-1190},
+  year = {2018},
+}
+""",
+        adapted_from=["HebrewSentimentAnalysis"],
+        superseded_by="HebrewSentimentAnalysis.v3",
+    )
+class HebrewSentimentAnalysisV3(AbsTaskClassification):
+    label_column_name = "labels"
+    metadata = TaskMetadata(
+        name="HebrewSentimentAnalysis.v3",
+        dataset={
+            "path": "mteb/HebrewSentimentAnalysisV4",
+            "revision": "aa0b83c4b16cd28daf7c41ef3402e3ffe9c70c59",
+        },
+        description=(
+            "HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. "
+            "In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, "
+            "the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, "
+            "the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. "
+            "This version corrects texts (took pre-tokenized) [more details in this thread](https://huggingface.co/datasets/mteb/HebrewSentimentAnalysis/discussions/2). "
+            "This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)"
+        ),
         reference="https://huggingface.co/datasets/hebrew_sentiment",
         type="Classification",
         category="t2c",

{mteb-2.3.3.dist-info → mteb-2.3.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mteb
-Version: 2.3.3
+Version: 2.3.5
 Summary: Massive Text Embedding Benchmark
 Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
 Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>

{mteb-2.3.3.dist-info → mteb-2.3.5.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ mteb/_helpful_enum.py,sha256=jh73N1jlcpg7RGz4bj8UpctiMNvqvHpp9wrB7SYEzIU,510
 mteb/_log_once.py,sha256=-tUKzxGQzf2LZSuQXi97oYFXMta1B6GEYXd7BPqssvY,1095
 mteb/_requires_package.py,sha256=eHg_TD9BVZRzNCcQQrUP17d8M1DF_vOd_tVx54AmAnM,3017
 mteb/_set_seed.py,sha256=HPlPRl__Pe6IG-4UgJqTfplcivJ_wA2kaClbXoHQedM,1178
-mteb/cache.py,sha256=77GtlwqHZxqkoCYcu76KCFL4AnHHkH8w-JY3oglMrbc,20102
+mteb/cache.py,sha256=XiFuhjZ2C-o0LgP1YM8g9As_vigJCUNfTrOb9-EiFlM,20177
 mteb/deprecated_evaluator.py,sha256=t13Eluvm5ByVIOqgT7fqiVfLb8Ud3A4bbF2djRfs8iA,26901
 mteb/evaluate.py,sha256=B60CkqRHzkI-3zIfHyocp-YUeWrzeoOvX_RN5vSlGqE,19363
 mteb/filter_tasks.py,sha256=5XE1OYmgDDoJYnXwFf4ma_PIT_Lekzs420sQF_kpCiY,7240
@@ -55,8 +55,8 @@ mteb/benchmarks/__init__.py,sha256=MQEVeli-zLaJ7Xg0z7RhXQwsdmm7Ht_W2Ln0rZo1Szc,2
 mteb/benchmarks/_create_table.py,sha256=OAiR44ynJ2fMzoBmVITQtOTYQzxIu9KUdS_HzlBlAck,20195
 mteb/benchmarks/benchmark.py,sha256=70RlMyyg_wkWTlU_IbfLl-KaqRWXGCKTd8fWe9X-AQE,4173
 mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
-mteb/benchmarks/benchmarks/__init__.py,sha256=0ySgD14Mu3Y1nJzazR_eUir81ia3x6E23N57SzQNkF0,2150
-mteb/benchmarks/benchmarks/benchmarks.py,sha256=Ob2cHVXwFk328xbV-2ZmUibiVAMtT2RN1ygGgiP6UNQ,92662
+mteb/benchmarks/benchmarks/__init__.py,sha256=Ig5dSFunzI-F-OamruuKJVSstbG3xQNkXCxRY3Bj_Ck,2180
+mteb/benchmarks/benchmarks/benchmarks.py,sha256=qHHmJfisT75VRVoZfPcHhShCG0jY6vSWZEx-D01XxKU,94757
 mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=QnCSrTTaBfcRlAQp2Nu81tgv1idMXqiM16Fp2zKJ5Ys,10607
 mteb/cli/__init__.py,sha256=v-csUr3eUZElIvrGB6QGtaIdndDfNWEe9oZchsGsJpg,64
 mteb/cli/_display_tasks.py,sha256=7A06dT9sSoTz6shyMvskPxuc5eHY_H7PGPlROzMP0yw,2196
@@ -252,6 +252,7 @@ mteb/descriptive_stats/Classification/HeadlineClassification.json,sha256=VfTqah7
 mteb/descriptive_stats/Classification/HeadlineClassification.v2.json,sha256=n-KiCmlKXb5QOzNG9QTdjwYR-cRV3Qvn96KOF2so7Cs,2110
 mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.json,sha256=j517xsonPntbr4k5Pa9ftGGIZqzaOLDvPqeGjWngdyI,1659
 mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v2.json,sha256=FaxgCr_lbS-ppsd_cEp_87kbCRCGqFUDMZv375XGvdk,1658
+mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json,sha256=75u2ZWek2BT8JQElPqBHvCqTEXbeTwlrXcyQC5NK-wU,1658
 mteb/descriptive_stats/Classification/HinDialectClassification.json,sha256=HK13-QJaWc0uC4wOqhZHKzT6i05_wJhURdUJ6Upq8OQ,4745
 mteb/descriptive_stats/Classification/HindiDiscourseClassification.json,sha256=4XMoJL46RHYKYKdndxD5lZ2b3hGjL2CArx3eRalO8eg,1049
 mteb/descriptive_stats/Classification/HindiDiscourseClassification.v2.json,sha256=MGz2Ntd2JxyHtlP3GhkhbMQ_yH2Eladd4vZSFOme2K8,2088
@@ -1431,11 +1432,11 @@ mteb/leaderboard/table.py,sha256=6SnrYC5GcBlvVSO6vOk6ObuqtoveBLv3JUuXqdKueG8,833
 mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
 mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
 mteb/models/abs_encoder.py,sha256=m0JkRfRPMYadDgBR9eozRloI31ZSWkSzDFINpwbfLZk,16533
-mteb/models/get_model_meta.py,sha256=VpZZNINk-QrNeVpPZnlqzlLhtBs8G84eRwTzAb_gRD4,9108
+mteb/models/get_model_meta.py,sha256=GeofphZ8wFtwAHYQipgQlZzxNIFAVFGzo_E2sMzjZTc,9350
 mteb/models/instruct_wrapper.py,sha256=Ty4nfEvioycL_uATkhd0PGuyeB5Xc9xrRd6HOGgb-tc,9005
 mteb/models/model_meta.py,sha256=b-Nel9nX5bJk4cgJnqkBzEKyMY7uXvxlCBSxmmH1Ios,14769
 mteb/models/models_protocols.py,sha256=D2hYWn_UBGMaKtRwBx3u0B0ni6lHJjSzTxX21XFNwIc,8917
-mteb/models/search_wrappers.py,sha256=AcMhjQyKdeitUjnaqgnP3_zTeVSum8rz1sjBRddHUVQ,20328
+mteb/models/search_wrappers.py,sha256=zpCvxUVNQWekyC4Fiz7mvlI0VPdSrFq41A0GrCDvBK4,20331
 mteb/models/sentence_transformer_wrapper.py,sha256=n5CMsM6Lpg_CFHH0NkpJusMsaLUTt-L9vRmFINQ961k,12338
 mteb/models/cache_wrappers/__init__.py,sha256=1w1TnMwulWJSzNkLXjbh5MY3sqgHWc6vUntYn49i9X8,169
 mteb/models/cache_wrappers/cache_backend_protocol.py,sha256=TR7kD7KbN1J4piszIecpegtLZYGy7sRHZt3SDWlImKk,1665
@@ -1534,6 +1535,7 @@ mteb/models/model_implementations/rerankers_custom.py,sha256=ro73A9-hHudy3_qIMrh
 mteb/models/model_implementations/rerankers_monot5_based.py,sha256=rxVwzapNnHl4gCw79XVCaTXj3-wbToyj7XVL97tpAF4,34302
 mteb/models/model_implementations/richinfoai_models.py,sha256=llvYa0JUjyOOMbuTgOYoJ2qeqZ5rLHX1ZjZIYlYbdvA,989
 mteb/models/model_implementations/ru_sentence_models.py,sha256=GuZFwbzaooufvSMGNjIsL0DDLrqHjhdSsAQHHZo5H08,40480
+mteb/models/model_implementations/ruri_models.py,sha256=-BTYkZ8dEWZUbGqx3YB5yFSrzMwZtXX7sMUHzrlB8ws,10043
 mteb/models/model_implementations/salesforce_models.py,sha256=KslTK-IKeLvNG-vQir9k6swkaOgjk6eyozm_BOVgTpY,5160
 mteb/models/model_implementations/samilpwc_models.py,sha256=oMwKNwCxoH1jZgCy04oo2oVlBZWu253QMpnEEC6emz8,2021
 mteb/models/model_implementations/searchmap_models.py,sha256=XvVl99emIgnNUCxkTuFQXW6py2R8vgsArfpyHveCugw,1904
@@ -1722,8 +1724,8 @@ mteb/tasks/classification/fra/french_book_reviews.py,sha256=Fsx8UznQVNDNUhcdsTeN
 mteb/tasks/classification/fra/movie_review_sentiment_classification.py,sha256=ov-fbReWP9T_RqhxFtS-gjNaZmbM9J8gQdBQyci5yqU,3290
 mteb/tasks/classification/guj/__init__.py,sha256=HZfimpBCywBLi5VGof_A9Ua6bqtMUoWGRhM1eqAEWKE,186
 mteb/tasks/classification/guj/gujarati_news_classification.py,sha256=VEdbzqlw8b8N8R3TQc275iiCxqGLAMAM8Nf_N7FnUGA,2303
-mteb/tasks/classification/heb/__init__.py,sha256=Wa9-nATstuSXN2OFsO3tF0BZdlk3AzM0g9R1VzZUTc0,171
-mteb/tasks/classification/heb/hebrew_sentiment_analysis.py,sha256=1DPoxWdGAZJB7LwEQWJzpR8KEg2fpbgGXe_cGv1bo5M,4523
+mteb/tasks/classification/heb/__init__.py,sha256=xQNtDxjUsCXQhA_ZYO_4kpBtHWQSBhWhQwAuWcTo6GE,246
+mteb/tasks/classification/heb/hebrew_sentiment_analysis.py,sha256=2wmKwq4Z4YKTG3QieuxmUE56ofiSOV63kV-jh8Zomh4,7281
 mteb/tasks/classification/hin/__init__.py,sha256=KdScMtYYmjsali0InoHP0PKQ8yCbaD-j2tLqc7_lhlo,356
 mteb/tasks/classification/hin/hindi_discourse_classification.py,sha256=HXLJZFEpnI7UjwA-NuadhrCplqaLiSb8Npla_6oXC48,3717
 mteb/tasks/classification/hin/sentiment_analysis_hindi.py,sha256=YIeEuNa2UTr8Jwh_wx15broRuD3NHNansGq7Bl9Vjl0,3101
@@ -2569,9 +2571,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
 mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
 mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
 mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
-mteb-2.3.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-mteb-2.3.3.dist-info/METADATA,sha256=LbvRqywjhaqAK4910G8ueME52YrrqFzvm4NXl2M3MBA,13923
-mteb-2.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-mteb-2.3.3.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
-mteb-2.3.3.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
-mteb-2.3.3.dist-info/RECORD,,
+mteb-2.3.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+mteb-2.3.5.dist-info/METADATA,sha256=Ud-HNDLgXkrYqVQczyt-TNpev3LR1rBhRDPKK3Dn_T0,13923
+mteb-2.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+mteb-2.3.5.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
+mteb-2.3.5.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
+mteb-2.3.5.dist-info/RECORD,,

{mteb-2.3.3.dist-info → mteb-2.3.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{mteb-2.3.3.dist-info → mteb-2.3.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mteb-2.3.3.dist-info → mteb-2.3.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{mteb-2.3.3.dist-info → mteb-2.3.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

mteb 2.3.3__py3-none-any.whl → 2.3.5__py3-none-any.whl

mteb 2.3.3py3-none-any.whl → 2.3.5py3-none-any.whl